/************************************************************************* ALGLIB 3.9.0 (source code generated 2014-12-11) Copyright (c) Sergey Bochkanov (ALGLIB project). >>> SOURCE LICENSE >>> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation (www.fsf.org); either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. A copy of the GNU General Public License is available at http://www.fsf.org/licensing/licenses >>> END OF LICENSE >>> *************************************************************************/ #pragma warning disable 162 #pragma warning disable 219 using System; public partial class alglib { /************************************************************************* Cache-oblivous complex "copy-and-transpose" Input parameters: M - number of rows N - number of columns A - source matrix, MxN submatrix is copied and transposed IA - submatrix offset (row index) JA - submatrix offset (column index) B - destination matrix, must be large enough to store result IB - submatrix offset (row index) JB - submatrix offset (column index) *************************************************************************/ public static void cmatrixtranspose(int m, int n, complex[,] a, int ia, int ja, ref complex[,] b, int ib, int jb) { ablas.cmatrixtranspose(m, n, a, ia, ja, ref b, ib, jb); return; } /************************************************************************* Cache-oblivous real "copy-and-transpose" Input parameters: M - number of rows N - number of columns A - source matrix, MxN submatrix is copied and transposed IA - submatrix offset (row index) JA - submatrix offset (column index) B - destination matrix, must be large enough to store result IB - submatrix offset (row index) JB - submatrix offset (column index) *************************************************************************/ public static void rmatrixtranspose(int m, int n, double[,] a, int ia, int ja, ref double[,] b, int ib, int jb) { ablas.rmatrixtranspose(m, n, a, ia, ja, b, ib, jb); return; } /************************************************************************* This code enforces symmetricy of the matrix by copying Upper part to lower one (or vice versa). INPUT PARAMETERS: A - matrix N - number of rows/columns IsUpper - whether we want to copy upper triangle to lower one (True) or vice versa (False). *************************************************************************/ public static void rmatrixenforcesymmetricity(ref double[,] a, int n, bool isupper) { ablas.rmatrixenforcesymmetricity(a, n, isupper); return; } /************************************************************************* Copy Input parameters: M - number of rows N - number of columns A - source matrix, MxN submatrix is copied and transposed IA - submatrix offset (row index) JA - submatrix offset (column index) B - destination matrix, must be large enough to store result IB - submatrix offset (row index) JB - submatrix offset (column index) *************************************************************************/ public static void cmatrixcopy(int m, int n, complex[,] a, int ia, int ja, ref complex[,] b, int ib, int jb) { ablas.cmatrixcopy(m, n, a, ia, ja, ref b, ib, jb); return; } /************************************************************************* Copy Input parameters: M - number of rows N - number of columns A - source matrix, MxN submatrix is copied and transposed IA - submatrix offset (row index) JA - submatrix offset (column index) B - destination matrix, must be large enough to store result IB - submatrix offset (row index) JB - submatrix offset (column index) *************************************************************************/ public static void rmatrixcopy(int m, int n, double[,] a, int ia, int ja, ref double[,] b, int ib, int jb) { ablas.rmatrixcopy(m, n, a, ia, ja, ref b, ib, jb); return; } /************************************************************************* Rank-1 correction: A := A + u*v' INPUT PARAMETERS: M - number of rows N - number of columns A - target matrix, MxN submatrix is updated IA - submatrix offset (row index) JA - submatrix offset (column index) U - vector #1 IU - subvector offset V - vector #2 IV - subvector offset *************************************************************************/ public static void cmatrixrank1(int m, int n, ref complex[,] a, int ia, int ja, ref complex[] u, int iu, ref complex[] v, int iv) { ablas.cmatrixrank1(m, n, ref a, ia, ja, ref u, iu, ref v, iv); return; } /************************************************************************* Rank-1 correction: A := A + u*v' INPUT PARAMETERS: M - number of rows N - number of columns A - target matrix, MxN submatrix is updated IA - submatrix offset (row index) JA - submatrix offset (column index) U - vector #1 IU - subvector offset V - vector #2 IV - subvector offset *************************************************************************/ public static void rmatrixrank1(int m, int n, ref double[,] a, int ia, int ja, ref double[] u, int iu, ref double[] v, int iv) { ablas.rmatrixrank1(m, n, ref a, ia, ja, ref u, iu, ref v, iv); return; } /************************************************************************* Matrix-vector product: y := op(A)*x INPUT PARAMETERS: M - number of rows of op(A) M>=0 N - number of columns of op(A) N>=0 A - target matrix IA - submatrix offset (row index) JA - submatrix offset (column index) OpA - operation type: * OpA=0 => op(A) = A * OpA=1 => op(A) = A^T * OpA=2 => op(A) = A^H X - input vector IX - subvector offset IY - subvector offset Y - preallocated matrix, must be large enough to store result OUTPUT PARAMETERS: Y - vector which stores result if M=0, then subroutine does nothing. if N=0, Y is filled by zeros. -- ALGLIB routine -- 28.01.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixmv(int m, int n, complex[,] a, int ia, int ja, int opa, complex[] x, int ix, ref complex[] y, int iy) { ablas.cmatrixmv(m, n, a, ia, ja, opa, x, ix, ref y, iy); return; } /************************************************************************* Matrix-vector product: y := op(A)*x INPUT PARAMETERS: M - number of rows of op(A) N - number of columns of op(A) A - target matrix IA - submatrix offset (row index) JA - submatrix offset (column index) OpA - operation type: * OpA=0 => op(A) = A * OpA=1 => op(A) = A^T X - input vector IX - subvector offset IY - subvector offset Y - preallocated matrix, must be large enough to store result OUTPUT PARAMETERS: Y - vector which stores result if M=0, then subroutine does nothing. if N=0, Y is filled by zeros. -- ALGLIB routine -- 28.01.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixmv(int m, int n, double[,] a, int ia, int ja, int opa, double[] x, int ix, ref double[] y, int iy) { ablas.rmatrixmv(m, n, a, ia, ja, opa, x, ix, ref y, iy); return; } /************************************************************************* This subroutine calculates X*op(A^-1) where: * X is MxN general matrix * A is NxN upper/lower triangular/unitriangular matrix * "op" may be identity transformation, transposition, conjugate transposition Multiplication result replaces X. Cache-oblivious algorithm is used. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 M - matrix size, N>=0 A - matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1] I1 - submatrix offset J1 - submatrix offset IsUpper - whether matrix is upper triangular IsUnit - whether matrix is unitriangular OpType - transformation type: * 0 - no transformation * 1 - transposition * 2 - conjugate transposition X - matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1] I2 - submatrix offset J2 - submatrix offset -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixrighttrsm(int m, int n, complex[,] a, int i1, int j1, bool isupper, bool isunit, int optype, ref complex[,] x, int i2, int j2) { ablas.cmatrixrighttrsm(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } public static void smp_cmatrixrighttrsm(int m, int n, complex[,] a, int i1, int j1, bool isupper, bool isunit, int optype, ref complex[,] x, int i2, int j2) { ablas._pexec_cmatrixrighttrsm(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } /************************************************************************* This subroutine calculates op(A^-1)*X where: * X is MxN general matrix * A is MxM upper/lower triangular/unitriangular matrix * "op" may be identity transformation, transposition, conjugate transposition Multiplication result replaces X. Cache-oblivious algorithm is used. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 M - matrix size, N>=0 A - matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1] I1 - submatrix offset J1 - submatrix offset IsUpper - whether matrix is upper triangular IsUnit - whether matrix is unitriangular OpType - transformation type: * 0 - no transformation * 1 - transposition * 2 - conjugate transposition X - matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1] I2 - submatrix offset J2 - submatrix offset -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixlefttrsm(int m, int n, complex[,] a, int i1, int j1, bool isupper, bool isunit, int optype, ref complex[,] x, int i2, int j2) { ablas.cmatrixlefttrsm(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } public static void smp_cmatrixlefttrsm(int m, int n, complex[,] a, int i1, int j1, bool isupper, bool isunit, int optype, ref complex[,] x, int i2, int j2) { ablas._pexec_cmatrixlefttrsm(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } /************************************************************************* This subroutine calculates X*op(A^-1) where: * X is MxN general matrix * A is NxN upper/lower triangular/unitriangular matrix * "op" may be identity transformation, transposition Multiplication result replaces X. Cache-oblivious algorithm is used. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 M - matrix size, N>=0 A - matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1] I1 - submatrix offset J1 - submatrix offset IsUpper - whether matrix is upper triangular IsUnit - whether matrix is unitriangular OpType - transformation type: * 0 - no transformation * 1 - transposition X - matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1] I2 - submatrix offset J2 - submatrix offset -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixrighttrsm(int m, int n, double[,] a, int i1, int j1, bool isupper, bool isunit, int optype, ref double[,] x, int i2, int j2) { ablas.rmatrixrighttrsm(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } public static void smp_rmatrixrighttrsm(int m, int n, double[,] a, int i1, int j1, bool isupper, bool isunit, int optype, ref double[,] x, int i2, int j2) { ablas._pexec_rmatrixrighttrsm(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } /************************************************************************* This subroutine calculates op(A^-1)*X where: * X is MxN general matrix * A is MxM upper/lower triangular/unitriangular matrix * "op" may be identity transformation, transposition Multiplication result replaces X. Cache-oblivious algorithm is used. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 M - matrix size, N>=0 A - matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1] I1 - submatrix offset J1 - submatrix offset IsUpper - whether matrix is upper triangular IsUnit - whether matrix is unitriangular OpType - transformation type: * 0 - no transformation * 1 - transposition X - matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1] I2 - submatrix offset J2 - submatrix offset -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixlefttrsm(int m, int n, double[,] a, int i1, int j1, bool isupper, bool isunit, int optype, ref double[,] x, int i2, int j2) { ablas.rmatrixlefttrsm(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } public static void smp_rmatrixlefttrsm(int m, int n, double[,] a, int i1, int j1, bool isupper, bool isunit, int optype, ref double[,] x, int i2, int j2) { ablas._pexec_rmatrixlefttrsm(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } /************************************************************************* This subroutine calculates C=alpha*A*A^H+beta*C or C=alpha*A^H*A+beta*C where: * C is NxN Hermitian matrix given by its upper/lower triangle * A is NxK matrix when A*A^H is calculated, KxN matrix otherwise Additional info: * cache-oblivious algorithm is used. * multiplication result replaces C. If Beta=0, C elements are not used in calculations (not multiplied by zero - just not referenced) * if Alpha=0, A is not used (not multiplied by zero - just not referenced) * if both Beta and Alpha are zero, C is filled by zeros. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 K - matrix size, K>=0 Alpha - coefficient A - matrix IA - submatrix offset (row index) JA - submatrix offset (column index) OpTypeA - multiplication type: * 0 - A*A^H is calculated * 2 - A^H*A is calculated Beta - coefficient C - preallocated input/output matrix IC - submatrix offset (row index) JC - submatrix offset (column index) IsUpper - whether upper or lower triangle of C is updated; this function updates only one half of C, leaving other half unchanged (not referenced at all). -- ALGLIB routine -- 16.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixherk(int n, int k, double alpha, complex[,] a, int ia, int ja, int optypea, double beta, ref complex[,] c, int ic, int jc, bool isupper) { ablas.cmatrixherk(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); return; } public static void smp_cmatrixherk(int n, int k, double alpha, complex[,] a, int ia, int ja, int optypea, double beta, ref complex[,] c, int ic, int jc, bool isupper) { ablas._pexec_cmatrixherk(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); return; } /************************************************************************* This subroutine calculates C=alpha*A*A^T+beta*C or C=alpha*A^T*A+beta*C where: * C is NxN symmetric matrix given by its upper/lower triangle * A is NxK matrix when A*A^T is calculated, KxN matrix otherwise Additional info: * cache-oblivious algorithm is used. * multiplication result replaces C. If Beta=0, C elements are not used in calculations (not multiplied by zero - just not referenced) * if Alpha=0, A is not used (not multiplied by zero - just not referenced) * if both Beta and Alpha are zero, C is filled by zeros. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 K - matrix size, K>=0 Alpha - coefficient A - matrix IA - submatrix offset (row index) JA - submatrix offset (column index) OpTypeA - multiplication type: * 0 - A*A^T is calculated * 2 - A^T*A is calculated Beta - coefficient C - preallocated input/output matrix IC - submatrix offset (row index) JC - submatrix offset (column index) IsUpper - whether C is upper triangular or lower triangular -- ALGLIB routine -- 16.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixsyrk(int n, int k, double alpha, double[,] a, int ia, int ja, int optypea, double beta, ref double[,] c, int ic, int jc, bool isupper) { ablas.rmatrixsyrk(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); return; } public static void smp_rmatrixsyrk(int n, int k, double alpha, double[,] a, int ia, int ja, int optypea, double beta, ref double[,] c, int ic, int jc, bool isupper) { ablas._pexec_rmatrixsyrk(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); return; } /************************************************************************* This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where: * C is MxN general matrix * op1(A) is MxK matrix * op2(B) is KxN matrix * "op" may be identity transformation, transposition, conjugate transposition Additional info: * cache-oblivious algorithm is used. * multiplication result replaces C. If Beta=0, C elements are not used in calculations (not multiplied by zero - just not referenced) * if Alpha=0, A is not used (not multiplied by zero - just not referenced) * if both Beta and Alpha are zero, C is filled by zeros. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. IMPORTANT: This function does NOT preallocate output matrix C, it MUST be preallocated by caller prior to calling this function. In case C does not have enough space to store result, exception will be generated. INPUT PARAMETERS M - matrix size, M>0 N - matrix size, N>0 K - matrix size, K>0 Alpha - coefficient A - matrix IA - submatrix offset JA - submatrix offset OpTypeA - transformation type: * 0 - no transformation * 1 - transposition * 2 - conjugate transposition B - matrix IB - submatrix offset JB - submatrix offset OpTypeB - transformation type: * 0 - no transformation * 1 - transposition * 2 - conjugate transposition Beta - coefficient C - matrix (PREALLOCATED, large enough to store result) IC - submatrix offset JC - submatrix offset -- ALGLIB routine -- 16.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixgemm(int m, int n, int k, complex alpha, complex[,] a, int ia, int ja, int optypea, complex[,] b, int ib, int jb, int optypeb, complex beta, ref complex[,] c, int ic, int jc) { ablas.cmatrixgemm(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); return; } public static void smp_cmatrixgemm(int m, int n, int k, complex alpha, complex[,] a, int ia, int ja, int optypea, complex[,] b, int ib, int jb, int optypeb, complex beta, ref complex[,] c, int ic, int jc) { ablas._pexec_cmatrixgemm(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); return; } /************************************************************************* This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where: * C is MxN general matrix * op1(A) is MxK matrix * op2(B) is KxN matrix * "op" may be identity transformation, transposition Additional info: * cache-oblivious algorithm is used. * multiplication result replaces C. If Beta=0, C elements are not used in calculations (not multiplied by zero - just not referenced) * if Alpha=0, A is not used (not multiplied by zero - just not referenced) * if both Beta and Alpha are zero, C is filled by zeros. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. IMPORTANT: This function does NOT preallocate output matrix C, it MUST be preallocated by caller prior to calling this function. In case C does not have enough space to store result, exception will be generated. INPUT PARAMETERS M - matrix size, M>0 N - matrix size, N>0 K - matrix size, K>0 Alpha - coefficient A - matrix IA - submatrix offset JA - submatrix offset OpTypeA - transformation type: * 0 - no transformation * 1 - transposition B - matrix IB - submatrix offset JB - submatrix offset OpTypeB - transformation type: * 0 - no transformation * 1 - transposition Beta - coefficient C - PREALLOCATED output matrix, large enough to store result IC - submatrix offset JC - submatrix offset -- ALGLIB routine -- 2009-2013 Bochkanov Sergey *************************************************************************/ public static void rmatrixgemm(int m, int n, int k, double alpha, double[,] a, int ia, int ja, int optypea, double[,] b, int ib, int jb, int optypeb, double beta, ref double[,] c, int ic, int jc) { ablas.rmatrixgemm(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); return; } public static void smp_rmatrixgemm(int m, int n, int k, double alpha, double[,] a, int ia, int ja, int optypea, double[,] b, int ib, int jb, int optypeb, double beta, ref double[,] c, int ic, int jc) { ablas._pexec_rmatrixgemm(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); return; } /************************************************************************* This subroutine is an older version of CMatrixHERK(), one with wrong name (it is HErmitian update, not SYmmetric). It is left here for backward compatibility. -- ALGLIB routine -- 16.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixsyrk(int n, int k, double alpha, complex[,] a, int ia, int ja, int optypea, double beta, ref complex[,] c, int ic, int jc, bool isupper) { ablas.cmatrixsyrk(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); return; } public static void smp_cmatrixsyrk(int n, int k, double alpha, complex[,] a, int ia, int ja, int optypea, double beta, ref complex[,] c, int ic, int jc, bool isupper) { ablas._pexec_cmatrixsyrk(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); return; } } public partial class alglib { /************************************************************************* QR decomposition of a rectangular matrix of size MxN COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix A whose indexes range within [0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. Output parameters: A - matrices Q and R in compact form (see below). Tau - array of scalar factors which are used to form matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)]. Matrix A is represented as A = QR, where Q is an orthogonal matrix of size MxM, R - upper triangular (or upper trapezoid) matrix of size M x N. The elements of matrix R are located on and above the main diagonal of matrix A. The elements which are located in Tau array and below the main diagonal of matrix A are used to form matrix Q as follows: Matrix Q is represented as a product of elementary reflections Q = H(0)*H(2)*...*H(k-1), where k = min(m,n), and each H(i) is in the form H(i) = 1 - tau * v * (v^T) where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i). -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixqr(ref double[,] a, int m, int n, out double[] tau) { tau = new double[0]; ortfac.rmatrixqr(ref a, m, n, ref tau); return; } public static void smp_rmatrixqr(ref double[,] a, int m, int n, out double[] tau) { tau = new double[0]; ortfac._pexec_rmatrixqr(ref a, m, n, ref tau); return; } /************************************************************************* LQ decomposition of a rectangular matrix of size MxN COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix A whose indexes range within [0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. Output parameters: A - matrices L and Q in compact form (see below) Tau - array of scalar factors which are used to form matrix Q. Array whose index ranges within [0..Min(M,N)-1]. Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size MxM, L - lower triangular (or lower trapezoid) matrix of size M x N. The elements of matrix L are located on and below the main diagonal of matrix A. The elements which are located in Tau array and above the main diagonal of matrix A are used to form matrix Q as follows: Matrix Q is represented as a product of elementary reflections Q = H(k-1)*H(k-2)*...*H(1)*H(0), where k = min(m,n), and each H(i) is of the form H(i) = 1 - tau * v * (v^T) where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0, v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1). -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixlq(ref double[,] a, int m, int n, out double[] tau) { tau = new double[0]; ortfac.rmatrixlq(ref a, m, n, ref tau); return; } public static void smp_rmatrixlq(ref double[,] a, int m, int n, out double[] tau) { tau = new double[0]; ortfac._pexec_rmatrixlq(ref a, m, n, ref tau); return; } /************************************************************************* QR decomposition of a rectangular complex matrix of size MxN COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix A whose indexes range within [0..M-1, 0..N-1] M - number of rows in matrix A. N - number of columns in matrix A. Output parameters: A - matrices Q and R in compact form Tau - array of scalar factors which are used to form matrix Q. Array whose indexes range within [0.. Min(M,N)-1] Matrix A is represented as A = QR, where Q is an orthogonal matrix of size MxM, R - upper triangular (or upper trapezoid) matrix of size MxN. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 *************************************************************************/ public static void cmatrixqr(ref complex[,] a, int m, int n, out complex[] tau) { tau = new complex[0]; ortfac.cmatrixqr(ref a, m, n, ref tau); return; } public static void smp_cmatrixqr(ref complex[,] a, int m, int n, out complex[] tau) { tau = new complex[0]; ortfac._pexec_cmatrixqr(ref a, m, n, ref tau); return; } /************************************************************************* LQ decomposition of a rectangular complex matrix of size MxN COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix A whose indexes range within [0..M-1, 0..N-1] M - number of rows in matrix A. N - number of columns in matrix A. Output parameters: A - matrices Q and L in compact form Tau - array of scalar factors which are used to form matrix Q. Array whose indexes range within [0.. Min(M,N)-1] Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size MxM, L - lower triangular (or lower trapezoid) matrix of size MxN. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 *************************************************************************/ public static void cmatrixlq(ref complex[,] a, int m, int n, out complex[] tau) { tau = new complex[0]; ortfac.cmatrixlq(ref a, m, n, ref tau); return; } public static void smp_cmatrixlq(ref complex[,] a, int m, int n, out complex[] tau) { tau = new complex[0]; ortfac._pexec_cmatrixlq(ref a, m, n, ref tau); return; } /************************************************************************* Partial unpacking of matrix Q from the QR decomposition of a matrix A COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrices Q and R in compact form. Output of RMatrixQR subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Tau - scalar factors which are used to form Q. Output of the RMatrixQR subroutine. QColumns - required number of columns of matrix Q. M>=QColumns>=0. Output parameters: Q - first QColumns columns of matrix Q. Array whose indexes range within [0..M-1, 0..QColumns-1]. If QColumns=0, the array remains unchanged. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixqrunpackq(double[,] a, int m, int n, double[] tau, int qcolumns, out double[,] q) { q = new double[0,0]; ortfac.rmatrixqrunpackq(a, m, n, tau, qcolumns, ref q); return; } public static void smp_rmatrixqrunpackq(double[,] a, int m, int n, double[] tau, int qcolumns, out double[,] q) { q = new double[0,0]; ortfac._pexec_rmatrixqrunpackq(a, m, n, tau, qcolumns, ref q); return; } /************************************************************************* Unpacking of matrix R from the QR decomposition of a matrix A Input parameters: A - matrices Q and R in compact form. Output of RMatrixQR subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Output parameters: R - matrix R, array[0..M-1, 0..N-1]. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixqrunpackr(double[,] a, int m, int n, out double[,] r) { r = new double[0,0]; ortfac.rmatrixqrunpackr(a, m, n, ref r); return; } /************************************************************************* Partial unpacking of matrix Q from the LQ decomposition of a matrix A COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrices L and Q in compact form. Output of RMatrixLQ subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Tau - scalar factors which are used to form Q. Output of the RMatrixLQ subroutine. QRows - required number of rows in matrix Q. N>=QRows>=0. Output parameters: Q - first QRows rows of matrix Q. Array whose indexes range within [0..QRows-1, 0..N-1]. If QRows=0, the array remains unchanged. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixlqunpackq(double[,] a, int m, int n, double[] tau, int qrows, out double[,] q) { q = new double[0,0]; ortfac.rmatrixlqunpackq(a, m, n, tau, qrows, ref q); return; } public static void smp_rmatrixlqunpackq(double[,] a, int m, int n, double[] tau, int qrows, out double[,] q) { q = new double[0,0]; ortfac._pexec_rmatrixlqunpackq(a, m, n, tau, qrows, ref q); return; } /************************************************************************* Unpacking of matrix L from the LQ decomposition of a matrix A Input parameters: A - matrices Q and L in compact form. Output of RMatrixLQ subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Output parameters: L - matrix L, array[0..M-1, 0..N-1]. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixlqunpackl(double[,] a, int m, int n, out double[,] l) { l = new double[0,0]; ortfac.rmatrixlqunpackl(a, m, n, ref l); return; } /************************************************************************* Partial unpacking of matrix Q from QR decomposition of a complex matrix A. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrices Q and R in compact form. Output of CMatrixQR subroutine . M - number of rows in matrix A. M>=0. N - number of columns in matrix A. N>=0. Tau - scalar factors which are used to form Q. Output of CMatrixQR subroutine . QColumns - required number of columns in matrix Q. M>=QColumns>=0. Output parameters: Q - first QColumns columns of matrix Q. Array whose index ranges within [0..M-1, 0..QColumns-1]. If QColumns=0, array isn't changed. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixqrunpackq(complex[,] a, int m, int n, complex[] tau, int qcolumns, out complex[,] q) { q = new complex[0,0]; ortfac.cmatrixqrunpackq(a, m, n, tau, qcolumns, ref q); return; } public static void smp_cmatrixqrunpackq(complex[,] a, int m, int n, complex[] tau, int qcolumns, out complex[,] q) { q = new complex[0,0]; ortfac._pexec_cmatrixqrunpackq(a, m, n, tau, qcolumns, ref q); return; } /************************************************************************* Unpacking of matrix R from the QR decomposition of a matrix A Input parameters: A - matrices Q and R in compact form. Output of CMatrixQR subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Output parameters: R - matrix R, array[0..M-1, 0..N-1]. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixqrunpackr(complex[,] a, int m, int n, out complex[,] r) { r = new complex[0,0]; ortfac.cmatrixqrunpackr(a, m, n, ref r); return; } /************************************************************************* Partial unpacking of matrix Q from LQ decomposition of a complex matrix A. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrices Q and R in compact form. Output of CMatrixLQ subroutine . M - number of rows in matrix A. M>=0. N - number of columns in matrix A. N>=0. Tau - scalar factors which are used to form Q. Output of CMatrixLQ subroutine . QRows - required number of rows in matrix Q. N>=QColumns>=0. Output parameters: Q - first QRows rows of matrix Q. Array whose index ranges within [0..QRows-1, 0..N-1]. If QRows=0, array isn't changed. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixlqunpackq(complex[,] a, int m, int n, complex[] tau, int qrows, out complex[,] q) { q = new complex[0,0]; ortfac.cmatrixlqunpackq(a, m, n, tau, qrows, ref q); return; } public static void smp_cmatrixlqunpackq(complex[,] a, int m, int n, complex[] tau, int qrows, out complex[,] q) { q = new complex[0,0]; ortfac._pexec_cmatrixlqunpackq(a, m, n, tau, qrows, ref q); return; } /************************************************************************* Unpacking of matrix L from the LQ decomposition of a matrix A Input parameters: A - matrices Q and L in compact form. Output of CMatrixLQ subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Output parameters: L - matrix L, array[0..M-1, 0..N-1]. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixlqunpackl(complex[,] a, int m, int n, out complex[,] l) { l = new complex[0,0]; ortfac.cmatrixlqunpackl(a, m, n, ref l); return; } /************************************************************************* Reduction of a rectangular matrix to bidiagonal form The algorithm reduces the rectangular matrix A to bidiagonal form by orthogonal transformations P and Q: A = Q*B*(P^T). COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Multithreaded acceleration is NOT supported for this function because ! bidiagonal decompostion is inherently sequential in nature. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - source matrix. array[0..M-1, 0..N-1] M - number of rows in matrix A. N - number of columns in matrix A. Output parameters: A - matrices Q, B, P in compact form (see below). TauQ - scalar factors which are used to form matrix Q. TauP - scalar factors which are used to form matrix P. The main diagonal and one of the secondary diagonals of matrix A are replaced with bidiagonal matrix B. Other elements contain elementary reflections which form MxM matrix Q and NxN matrix P, respectively. If M>=N, B is the upper bidiagonal MxN matrix and is stored in the corresponding elements of matrix A. Matrix Q is represented as a product of elementary reflections Q = H(0)*H(1)*...*H(n-1), where H(i) = 1-tau*v*v'. Here tau is a scalar which is stored in TauQ[i], and vector v has the following structure: v(0:i-1)=0, v(i)=1, v(i+1:m-1) is stored in elements A(i+1:m-1,i). Matrix P is as follows: P = G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i], u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1). If M n): m=5, n=6 (m < n): ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) ( v1 v2 v3 v4 v5 ) Here vi and ui are vectors which form H(i) and G(i), and d and e - are the diagonal and off-diagonal elements of matrix B. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994. Sergey Bochkanov, ALGLIB project, translation from FORTRAN to pseudocode, 2007-2010. *************************************************************************/ public static void rmatrixbd(ref double[,] a, int m, int n, out double[] tauq, out double[] taup) { tauq = new double[0]; taup = new double[0]; ortfac.rmatrixbd(ref a, m, n, ref tauq, ref taup); return; } /************************************************************************* Unpacking matrix Q which reduces a matrix to bidiagonal form. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: QP - matrices Q and P in compact form. Output of ToBidiagonal subroutine. M - number of rows in matrix A. N - number of columns in matrix A. TAUQ - scalar factors which are used to form Q. Output of ToBidiagonal subroutine. QColumns - required number of columns in matrix Q. M>=QColumns>=0. Output parameters: Q - first QColumns columns of matrix Q. Array[0..M-1, 0..QColumns-1] If QColumns=0, the array is not modified. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixbdunpackq(double[,] qp, int m, int n, double[] tauq, int qcolumns, out double[,] q) { q = new double[0,0]; ortfac.rmatrixbdunpackq(qp, m, n, tauq, qcolumns, ref q); return; } /************************************************************************* Multiplication by matrix Q which reduces matrix A to bidiagonal form. The algorithm allows pre- or post-multiply by Q or Q'. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: QP - matrices Q and P in compact form. Output of ToBidiagonal subroutine. M - number of rows in matrix A. N - number of columns in matrix A. TAUQ - scalar factors which are used to form Q. Output of ToBidiagonal subroutine. Z - multiplied matrix. array[0..ZRows-1,0..ZColumns-1] ZRows - number of rows in matrix Z. If FromTheRight=False, ZRows=M, otherwise ZRows can be arbitrary. ZColumns - number of columns in matrix Z. If FromTheRight=True, ZColumns=M, otherwise ZColumns can be arbitrary. FromTheRight - pre- or post-multiply. DoTranspose - multiply by Q or Q'. Output parameters: Z - product of Z and Q. Array[0..ZRows-1,0..ZColumns-1] If ZRows=0 or ZColumns=0, the array is not modified. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixbdmultiplybyq(double[,] qp, int m, int n, double[] tauq, ref double[,] z, int zrows, int zcolumns, bool fromtheright, bool dotranspose) { ortfac.rmatrixbdmultiplybyq(qp, m, n, tauq, ref z, zrows, zcolumns, fromtheright, dotranspose); return; } /************************************************************************* Unpacking matrix P which reduces matrix A to bidiagonal form. The subroutine returns transposed matrix P. Input parameters: QP - matrices Q and P in compact form. Output of ToBidiagonal subroutine. M - number of rows in matrix A. N - number of columns in matrix A. TAUP - scalar factors which are used to form P. Output of ToBidiagonal subroutine. PTRows - required number of rows of matrix P^T. N >= PTRows >= 0. Output parameters: PT - first PTRows columns of matrix P^T Array[0..PTRows-1, 0..N-1] If PTRows=0, the array is not modified. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixbdunpackpt(double[,] qp, int m, int n, double[] taup, int ptrows, out double[,] pt) { pt = new double[0,0]; ortfac.rmatrixbdunpackpt(qp, m, n, taup, ptrows, ref pt); return; } /************************************************************************* Multiplication by matrix P which reduces matrix A to bidiagonal form. The algorithm allows pre- or post-multiply by P or P'. Input parameters: QP - matrices Q and P in compact form. Output of RMatrixBD subroutine. M - number of rows in matrix A. N - number of columns in matrix A. TAUP - scalar factors which are used to form P. Output of RMatrixBD subroutine. Z - multiplied matrix. Array whose indexes range within [0..ZRows-1,0..ZColumns-1]. ZRows - number of rows in matrix Z. If FromTheRight=False, ZRows=N, otherwise ZRows can be arbitrary. ZColumns - number of columns in matrix Z. If FromTheRight=True, ZColumns=N, otherwise ZColumns can be arbitrary. FromTheRight - pre- or post-multiply. DoTranspose - multiply by P or P'. Output parameters: Z - product of Z and P. Array whose indexes range within [0..ZRows-1,0..ZColumns-1]. If ZRows=0 or ZColumns=0, the array is not modified. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixbdmultiplybyp(double[,] qp, int m, int n, double[] taup, ref double[,] z, int zrows, int zcolumns, bool fromtheright, bool dotranspose) { ortfac.rmatrixbdmultiplybyp(qp, m, n, taup, ref z, zrows, zcolumns, fromtheright, dotranspose); return; } /************************************************************************* Unpacking of the main and secondary diagonals of bidiagonal decomposition of matrix A. Input parameters: B - output of RMatrixBD subroutine. M - number of rows in matrix B. N - number of columns in matrix B. Output parameters: IsUpper - True, if the matrix is upper bidiagonal. otherwise IsUpper is False. D - the main diagonal. Array whose index ranges within [0..Min(M,N)-1]. E - the secondary diagonal (upper or lower, depending on the value of IsUpper). Array index ranges within [0..Min(M,N)-1], the last element is not used. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixbdunpackdiagonals(double[,] b, int m, int n, out bool isupper, out double[] d, out double[] e) { isupper = false; d = new double[0]; e = new double[0]; ortfac.rmatrixbdunpackdiagonals(b, m, n, ref isupper, ref d, ref e); return; } /************************************************************************* Reduction of a square matrix to upper Hessenberg form: Q'*A*Q = H, where Q is an orthogonal matrix, H - Hessenberg matrix. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix A with elements [0..N-1, 0..N-1] N - size of matrix A. Output parameters: A - matrices Q and P in compact form (see below). Tau - array of scalar factors which are used to form matrix Q. Array whose index ranges within [0..N-2] Matrix H is located on the main diagonal, on the lower secondary diagonal and above the main diagonal of matrix A. The elements which are used to form matrix Q are situated in array Tau and below the lower secondary diagonal of matrix A as follows: Matrix Q is represented as a product of elementary reflections Q = H(0)*H(2)*...*H(n-2), where each H(i) is given by H(i) = 1 - tau * v * (v^T) where tau is a scalar stored in Tau[I]; v - is a real vector, so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i). -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 *************************************************************************/ public static void rmatrixhessenberg(ref double[,] a, int n, out double[] tau) { tau = new double[0]; ortfac.rmatrixhessenberg(ref a, n, ref tau); return; } /************************************************************************* Unpacking matrix Q which reduces matrix A to upper Hessenberg form COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - output of RMatrixHessenberg subroutine. N - size of matrix A. Tau - scalar factors which are used to form Q. Output of RMatrixHessenberg subroutine. Output parameters: Q - matrix Q. Array whose indexes range within [0..N-1, 0..N-1]. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixhessenbergunpackq(double[,] a, int n, double[] tau, out double[,] q) { q = new double[0,0]; ortfac.rmatrixhessenbergunpackq(a, n, tau, ref q); return; } /************************************************************************* Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form) Input parameters: A - output of RMatrixHessenberg subroutine. N - size of matrix A. Output parameters: H - matrix H. Array whose indexes range within [0..N-1, 0..N-1]. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixhessenbergunpackh(double[,] a, int n, out double[,] h) { h = new double[0,0]; ortfac.rmatrixhessenbergunpackh(a, n, ref h); return; } /************************************************************************* Reduction of a symmetric matrix which is given by its higher or lower triangular part to a tridiagonal matrix using orthogonal similarity transformation: Q'*A*Q=T. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix to be transformed array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. If IsUpper = True, then matrix A is given by its upper triangle, and the lower triangle is not used and not modified by the algorithm, and vice versa if IsUpper = False. Output parameters: A - matrices T and Q in compact form (see lower) Tau - array of factors which are forming matrices H(i) array with elements [0..N-2]. D - main diagonal of symmetric matrix T. array with elements [0..N-1]. E - secondary diagonal of symmetric matrix T. array with elements [0..N-2]. If IsUpper=True, the matrix Q is represented as a product of elementary reflectors Q = H(n-2) . . . H(2) H(0). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in A(0:i-1,i+1), and tau in TAU(i). If IsUpper=False, the matrix Q is represented as a product of elementary reflectors Q = H(0) H(2) . . . H(n-2). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = 'U': if UPLO = 'L': ( d e v1 v2 v3 ) ( d ) ( d e v2 v3 ) ( e d ) ( d e v3 ) ( v0 e d ) ( d e ) ( v0 v1 e d ) ( d ) ( v0 v1 v2 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 *************************************************************************/ public static void smatrixtd(ref double[,] a, int n, bool isupper, out double[] tau, out double[] d, out double[] e) { tau = new double[0]; d = new double[0]; e = new double[0]; ortfac.smatrixtd(ref a, n, isupper, ref tau, ref d, ref e); return; } /************************************************************************* Unpacking matrix Q which reduces symmetric matrix to a tridiagonal form. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - the result of a SMatrixTD subroutine N - size of matrix A. IsUpper - storage format (a parameter of SMatrixTD subroutine) Tau - the result of a SMatrixTD subroutine Output parameters: Q - transformation matrix. array with elements [0..N-1, 0..N-1]. -- ALGLIB -- Copyright 2005-2010 by Bochkanov Sergey *************************************************************************/ public static void smatrixtdunpackq(double[,] a, int n, bool isupper, double[] tau, out double[,] q) { q = new double[0,0]; ortfac.smatrixtdunpackq(a, n, isupper, tau, ref q); return; } /************************************************************************* Reduction of a Hermitian matrix which is given by its higher or lower triangular part to a real tridiagonal matrix using unitary similarity transformation: Q'*A*Q = T. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix to be transformed array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. If IsUpper = True, then matrix A is given by its upper triangle, and the lower triangle is not used and not modified by the algorithm, and vice versa if IsUpper = False. Output parameters: A - matrices T and Q in compact form (see lower) Tau - array of factors which are forming matrices H(i) array with elements [0..N-2]. D - main diagonal of real symmetric matrix T. array with elements [0..N-1]. E - secondary diagonal of real symmetric matrix T. array with elements [0..N-2]. If IsUpper=True, the matrix Q is represented as a product of elementary reflectors Q = H(n-2) . . . H(2) H(0). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in A(0:i-1,i+1), and tau in TAU(i). If IsUpper=False, the matrix Q is represented as a product of elementary reflectors Q = H(0) H(2) . . . H(n-2). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = 'U': if UPLO = 'L': ( d e v1 v2 v3 ) ( d ) ( d e v2 v3 ) ( e d ) ( d e v3 ) ( v0 e d ) ( d e ) ( v0 v1 e d ) ( d ) ( v0 v1 v2 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 *************************************************************************/ public static void hmatrixtd(ref complex[,] a, int n, bool isupper, out complex[] tau, out double[] d, out double[] e) { tau = new complex[0]; d = new double[0]; e = new double[0]; ortfac.hmatrixtd(ref a, n, isupper, ref tau, ref d, ref e); return; } /************************************************************************* Unpacking matrix Q which reduces a Hermitian matrix to a real tridiagonal form. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - the result of a HMatrixTD subroutine N - size of matrix A. IsUpper - storage format (a parameter of HMatrixTD subroutine) Tau - the result of a HMatrixTD subroutine Output parameters: Q - transformation matrix. array with elements [0..N-1, 0..N-1]. -- ALGLIB -- Copyright 2005-2010 by Bochkanov Sergey *************************************************************************/ public static void hmatrixtdunpackq(complex[,] a, int n, bool isupper, complex[] tau, out complex[,] q) { q = new complex[0,0]; ortfac.hmatrixtdunpackq(a, n, isupper, tau, ref q); return; } } public partial class alglib { /************************************************************************* Singular value decomposition of a bidiagonal matrix (extended algorithm) COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. The algorithm performs the singular value decomposition of a bidiagonal matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and P - orthogonal matrices, S - diagonal matrix with non-negative elements on the main diagonal, in descending order. The algorithm finds singular values. In addition, the algorithm can calculate matrices Q and P (more precisely, not the matrices, but their product with given matrices U and VT - U*Q and (P^T)*VT)). Of course, matrices U and VT can be of any type, including identity. Furthermore, the algorithm can calculate Q'*C (this product is calculated more effectively than U*Q, because this calculation operates with rows instead of matrix columns). The feature of the algorithm is its ability to find all singular values including those which are arbitrarily close to 0 with relative accuracy close to machine precision. If the parameter IsFractionalAccuracyRequired is set to True, all singular values will have high relative accuracy close to machine precision. If the parameter is set to False, only the biggest singular value will have relative accuracy close to machine precision. The absolute error of other singular values is equal to the absolute error of the biggest singular value. Input parameters: D - main diagonal of matrix B. Array whose index ranges within [0..N-1]. E - superdiagonal (or subdiagonal) of matrix B. Array whose index ranges within [0..N-2]. N - size of matrix B. IsUpper - True, if the matrix is upper bidiagonal. IsFractionalAccuracyRequired - THIS PARAMETER IS IGNORED SINCE ALGLIB 3.5.0 SINGULAR VALUES ARE ALWAYS SEARCHED WITH HIGH ACCURACY. U - matrix to be multiplied by Q. Array whose indexes range within [0..NRU-1, 0..N-1]. The matrix can be bigger, in that case only the submatrix [0..NRU-1, 0..N-1] will be multiplied by Q. NRU - number of rows in matrix U. C - matrix to be multiplied by Q'. Array whose indexes range within [0..N-1, 0..NCC-1]. The matrix can be bigger, in that case only the submatrix [0..N-1, 0..NCC-1] will be multiplied by Q'. NCC - number of columns in matrix C. VT - matrix to be multiplied by P^T. Array whose indexes range within [0..N-1, 0..NCVT-1]. The matrix can be bigger, in that case only the submatrix [0..N-1, 0..NCVT-1] will be multiplied by P^T. NCVT - number of columns in matrix VT. Output parameters: D - singular values of matrix B in descending order. U - if NRU>0, contains matrix U*Q. VT - if NCVT>0, contains matrix (P^T)*VT. C - if NCC>0, contains matrix Q'*C. Result: True, if the algorithm has converged. False, if the algorithm hasn't converged (rare case). NOTE: multiplication U*Q is performed by means of transposition to internal buffer, multiplication and backward transposition. It helps to avoid costly columnwise operations and speed-up algorithm. Additional information: The type of convergence is controlled by the internal parameter TOL. If the parameter is greater than 0, the singular values will have relative accuracy TOL. If TOL<0, the singular values will have absolute accuracy ABS(TOL)*norm(B). By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon, where Epsilon is the machine precision. It is not recommended to use TOL less than 10*Epsilon since this will considerably slow down the algorithm and may not lead to error decreasing. History: * 31 March, 2007. changed MAXITR from 6 to 12. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999. *************************************************************************/ public static bool rmatrixbdsvd(ref double[] d, double[] e, int n, bool isupper, bool isfractionalaccuracyrequired, ref double[,] u, int nru, ref double[,] c, int ncc, ref double[,] vt, int ncvt) { bool result = bdsvd.rmatrixbdsvd(ref d, e, n, isupper, isfractionalaccuracyrequired, ref u, nru, ref c, ncc, ref vt, ncvt); return result; } } public partial class alglib { /************************************************************************* Singular value decomposition of a rectangular matrix. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is only partially supported (some parts are ! optimized, but most - are not). ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. The algorithm calculates the singular value decomposition of a matrix of size MxN: A = U * S * V^T The algorithm finds the singular values and, optionally, matrices U and V^T. The algorithm can find both first min(M,N) columns of matrix U and rows of matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM and NxN respectively). Take into account that the subroutine does not return matrix V but V^T. Input parameters: A - matrix to be decomposed. Array whose indexes range within [0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. UNeeded - 0, 1 or 2. See the description of the parameter U. VTNeeded - 0, 1 or 2. See the description of the parameter VT. AdditionalMemory - If the parameter: * equals 0, the algorithm doesn’t use additional memory (lower requirements, lower performance). * equals 1, the algorithm uses additional memory of size min(M,N)*min(M,N) of real numbers. It often speeds up the algorithm. * equals 2, the algorithm uses additional memory of size M*min(M,N) of real numbers. It allows to get a maximum performance. The recommended value of the parameter is 2. Output parameters: W - contains singular values in descending order. U - if UNeeded=0, U isn't changed, the left singular vectors are not calculated. if Uneeded=1, U contains left singular vectors (first min(M,N) columns of matrix U). Array whose indexes range within [0..M-1, 0..Min(M,N)-1]. if UNeeded=2, U contains matrix U wholly. Array whose indexes range within [0..M-1, 0..M-1]. VT - if VTNeeded=0, VT isn’t changed, the right singular vectors are not calculated. if VTNeeded=1, VT contains right singular vectors (first min(M,N) rows of matrix V^T). Array whose indexes range within [0..min(M,N)-1, 0..N-1]. if VTNeeded=2, VT contains matrix V^T wholly. Array whose indexes range within [0..N-1, 0..N-1]. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static bool rmatrixsvd(double[,] a, int m, int n, int uneeded, int vtneeded, int additionalmemory, out double[] w, out double[,] u, out double[,] vt) { w = new double[0]; u = new double[0,0]; vt = new double[0,0]; bool result = svd.rmatrixsvd(a, m, n, uneeded, vtneeded, additionalmemory, ref w, ref u, ref vt); return result; } public static bool smp_rmatrixsvd(double[,] a, int m, int n, int uneeded, int vtneeded, int additionalmemory, out double[] w, out double[,] u, out double[,] vt) { w = new double[0]; u = new double[0,0]; vt = new double[0,0]; bool result = svd._pexec_rmatrixsvd(a, m, n, uneeded, vtneeded, additionalmemory, ref w, ref u, ref vt); return result; } } public partial class alglib { /************************************************************************* Finding the eigenvalues and eigenvectors of a symmetric matrix The algorithm finds eigen pairs of a symmetric matrix by reducing it to tridiagonal form and using the QL/QR algorithm. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - symmetric matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. IsUpper - storage format. Output parameters: D - eigenvalues in ascending order. Array whose index ranges within [0..N-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains the eigenvectors. Array whose indexes range within [0..N-1, 0..N-1]. The eigenvectors are stored in the matrix columns. Result: True, if the algorithm has converged. False, if the algorithm hasn't converged (rare case). -- ALGLIB -- Copyright 2005-2008 by Bochkanov Sergey *************************************************************************/ public static bool smatrixevd(double[,] a, int n, int zneeded, bool isupper, out double[] d, out double[,] z) { d = new double[0]; z = new double[0,0]; bool result = evd.smatrixevd(a, n, zneeded, isupper, ref d, ref z); return result; } /************************************************************************* Subroutine for finding the eigenvalues (and eigenvectors) of a symmetric matrix in a given half open interval (A, B] by using a bisection and inverse iteration Input parameters: A - symmetric matrix which is given by its upper or lower triangular part. Array [0..N-1, 0..N-1]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. IsUpperA - storage format of matrix A. B1, B2 - half open interval (B1, B2] to search eigenvalues in. Output parameters: M - number of eigenvalues found in a given half-interval (M>=0). W - array of the eigenvalues found. Array whose index ranges within [0..M-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains eigenvectors. Array whose indexes range within [0..N-1, 0..M-1]. The eigenvectors are stored in the matrix columns. Result: True, if successful. M contains the number of eigenvalues in the given half-interval (could be equal to 0), W contains the eigenvalues, Z contains the eigenvectors (if needed). False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned, M is equal to 0. -- ALGLIB -- Copyright 07.01.2006 by Bochkanov Sergey *************************************************************************/ public static bool smatrixevdr(double[,] a, int n, int zneeded, bool isupper, double b1, double b2, out int m, out double[] w, out double[,] z) { m = 0; w = new double[0]; z = new double[0,0]; bool result = evd.smatrixevdr(a, n, zneeded, isupper, b1, b2, ref m, ref w, ref z); return result; } /************************************************************************* Subroutine for finding the eigenvalues and eigenvectors of a symmetric matrix with given indexes by using bisection and inverse iteration methods. Input parameters: A - symmetric matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. IsUpperA - storage format of matrix A. I1, I2 - index interval for searching (from I1 to I2). 0 <= I1 <= I2 <= N-1. Output parameters: W - array of the eigenvalues found. Array whose index ranges within [0..I2-I1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains eigenvectors. Array whose indexes range within [0..N-1, 0..I2-I1]. In that case, the eigenvectors are stored in the matrix columns. Result: True, if successful. W contains the eigenvalues, Z contains the eigenvectors (if needed). False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned. -- ALGLIB -- Copyright 07.01.2006 by Bochkanov Sergey *************************************************************************/ public static bool smatrixevdi(double[,] a, int n, int zneeded, bool isupper, int i1, int i2, out double[] w, out double[,] z) { w = new double[0]; z = new double[0,0]; bool result = evd.smatrixevdi(a, n, zneeded, isupper, i1, i2, ref w, ref z); return result; } /************************************************************************* Finding the eigenvalues and eigenvectors of a Hermitian matrix The algorithm finds eigen pairs of a Hermitian matrix by reducing it to real tridiagonal form and using the QL/QR algorithm. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - Hermitian matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. Output parameters: D - eigenvalues in ascending order. Array whose index ranges within [0..N-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains the eigenvectors. Array whose indexes range within [0..N-1, 0..N-1]. The eigenvectors are stored in the matrix columns. Result: True, if the algorithm has converged. False, if the algorithm hasn't converged (rare case). Note: eigenvectors of Hermitian matrix are defined up to multiplication by a complex number L, such that |L|=1. -- ALGLIB -- Copyright 2005, 23 March 2007 by Bochkanov Sergey *************************************************************************/ public static bool hmatrixevd(complex[,] a, int n, int zneeded, bool isupper, out double[] d, out complex[,] z) { d = new double[0]; z = new complex[0,0]; bool result = evd.hmatrixevd(a, n, zneeded, isupper, ref d, ref z); return result; } /************************************************************************* Subroutine for finding the eigenvalues (and eigenvectors) of a Hermitian matrix in a given half-interval (A, B] by using a bisection and inverse iteration Input parameters: A - Hermitian matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. IsUpperA - storage format of matrix A. B1, B2 - half-interval (B1, B2] to search eigenvalues in. Output parameters: M - number of eigenvalues found in a given half-interval, M>=0 W - array of the eigenvalues found. Array whose index ranges within [0..M-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains eigenvectors. Array whose indexes range within [0..N-1, 0..M-1]. The eigenvectors are stored in the matrix columns. Result: True, if successful. M contains the number of eigenvalues in the given half-interval (could be equal to 0), W contains the eigenvalues, Z contains the eigenvectors (if needed). False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned, M is equal to 0. Note: eigen vectors of Hermitian matrix are defined up to multiplication by a complex number L, such as |L|=1. -- ALGLIB -- Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey. *************************************************************************/ public static bool hmatrixevdr(complex[,] a, int n, int zneeded, bool isupper, double b1, double b2, out int m, out double[] w, out complex[,] z) { m = 0; w = new double[0]; z = new complex[0,0]; bool result = evd.hmatrixevdr(a, n, zneeded, isupper, b1, b2, ref m, ref w, ref z); return result; } /************************************************************************* Subroutine for finding the eigenvalues and eigenvectors of a Hermitian matrix with given indexes by using bisection and inverse iteration methods Input parameters: A - Hermitian matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. IsUpperA - storage format of matrix A. I1, I2 - index interval for searching (from I1 to I2). 0 <= I1 <= I2 <= N-1. Output parameters: W - array of the eigenvalues found. Array whose index ranges within [0..I2-I1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains eigenvectors. Array whose indexes range within [0..N-1, 0..I2-I1]. In that case, the eigenvectors are stored in the matrix columns. Result: True, if successful. W contains the eigenvalues, Z contains the eigenvectors (if needed). False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned. Note: eigen vectors of Hermitian matrix are defined up to multiplication by a complex number L, such as |L|=1. -- ALGLIB -- Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey. *************************************************************************/ public static bool hmatrixevdi(complex[,] a, int n, int zneeded, bool isupper, int i1, int i2, out double[] w, out complex[,] z) { w = new double[0]; z = new complex[0,0]; bool result = evd.hmatrixevdi(a, n, zneeded, isupper, i1, i2, ref w, ref z); return result; } /************************************************************************* Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by using an QL/QR algorithm with implicit shifts. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: D - the main diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-1]. E - the secondary diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-2]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not needed; * 1, the eigenvectors of a tridiagonal matrix are multiplied by the square matrix Z. It is used if the tridiagonal matrix is obtained by the similarity transformation of a symmetric matrix; * 2, the eigenvectors of a tridiagonal matrix replace the square matrix Z; * 3, matrix Z contains the first row of the eigenvectors matrix. Z - if ZNeeded=1, Z contains the square matrix by which the eigenvectors are multiplied. Array whose indexes range within [0..N-1, 0..N-1]. Output parameters: D - eigenvalues in ascending order. Array whose index ranges within [0..N-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains the product of a given matrix (from the left) and the eigenvectors matrix (from the right); * 2, Z contains the eigenvectors. * 3, Z contains the first row of the eigenvectors matrix. If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1]. In that case, the eigenvectors are stored in the matrix columns. If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1]. Result: True, if the algorithm has converged. False, if the algorithm hasn't converged. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 *************************************************************************/ public static bool smatrixtdevd(ref double[] d, double[] e, int n, int zneeded, ref double[,] z) { bool result = evd.smatrixtdevd(ref d, e, n, zneeded, ref z); return result; } /************************************************************************* Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a given half-interval (A, B] by using bisection and inverse iteration. Input parameters: D - the main diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-1]. E - the secondary diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-2]. N - size of matrix, N>=0. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not needed; * 1, the eigenvectors of a tridiagonal matrix are multiplied by the square matrix Z. It is used if the tridiagonal matrix is obtained by the similarity transformation of a symmetric matrix. * 2, the eigenvectors of a tridiagonal matrix replace matrix Z. A, B - half-interval (A, B] to search eigenvalues in. Z - if ZNeeded is equal to: * 0, Z isn't used and remains unchanged; * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1]) which reduces the given symmetric matrix to tridiagonal form; * 2, Z isn't used (but changed on the exit). Output parameters: D - array of the eigenvalues found. Array whose index ranges within [0..M-1]. M - number of eigenvalues found in the given half-interval (M>=0). Z - if ZNeeded is equal to: * 0, doesn't contain any information; * 1, contains the product of a given NxN matrix Z (from the left) and NxM matrix of the eigenvectors found (from the right). Array whose indexes range within [0..N-1, 0..M-1]. * 2, contains the matrix of the eigenvectors found. Array whose indexes range within [0..N-1, 0..M-1]. Result: True, if successful. In that case, M contains the number of eigenvalues in the given half-interval (could be equal to 0), D contains the eigenvalues, Z contains the eigenvectors (if needed). It should be noted that the subroutine changes the size of arrays D and Z. False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned, M is equal to 0. -- ALGLIB -- Copyright 31.03.2008 by Bochkanov Sergey *************************************************************************/ public static bool smatrixtdevdr(ref double[] d, double[] e, int n, int zneeded, double a, double b, out int m, ref double[,] z) { m = 0; bool result = evd.smatrixtdevdr(ref d, e, n, zneeded, a, b, ref m, ref z); return result; } /************************************************************************* Subroutine for finding tridiagonal matrix eigenvalues/vectors with given indexes (in ascending order) by using the bisection and inverse iteraion. Input parameters: D - the main diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-1]. E - the secondary diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-2]. N - size of matrix. N>=0. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not needed; * 1, the eigenvectors of a tridiagonal matrix are multiplied by the square matrix Z. It is used if the tridiagonal matrix is obtained by the similarity transformation of a symmetric matrix. * 2, the eigenvectors of a tridiagonal matrix replace matrix Z. I1, I2 - index interval for searching (from I1 to I2). 0 <= I1 <= I2 <= N-1. Z - if ZNeeded is equal to: * 0, Z isn't used and remains unchanged; * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1]) which reduces the given symmetric matrix to tridiagonal form; * 2, Z isn't used (but changed on the exit). Output parameters: D - array of the eigenvalues found. Array whose index ranges within [0..I2-I1]. Z - if ZNeeded is equal to: * 0, doesn't contain any information; * 1, contains the product of a given NxN matrix Z (from the left) and Nx(I2-I1) matrix of the eigenvectors found (from the right). Array whose indexes range within [0..N-1, 0..I2-I1]. * 2, contains the matrix of the eigenvalues found. Array whose indexes range within [0..N-1, 0..I2-I1]. Result: True, if successful. In that case, D contains the eigenvalues, Z contains the eigenvectors (if needed). It should be noted that the subroutine changes the size of arrays D and Z. False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned. -- ALGLIB -- Copyright 25.12.2005 by Bochkanov Sergey *************************************************************************/ public static bool smatrixtdevdi(ref double[] d, double[] e, int n, int zneeded, int i1, int i2, ref double[,] z) { bool result = evd.smatrixtdevdi(ref d, e, n, zneeded, i1, i2, ref z); return result; } /************************************************************************* Finding eigenvalues and eigenvectors of a general (unsymmetric) matrix COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. Speed-up provided by MKL for this particular problem (EVD) ! is really high, because MKL uses combination of (a) better low-level ! optimizations, and (b) better EVD algorithms. ! ! On one particular SSE-capable machine for N=1024, commercial MKL- ! -capable ALGLIB was: ! * 7-10 times faster than open source "generic C" version ! * 15-18 times faster than "pure C#" version ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. The algorithm finds eigenvalues and eigenvectors of a general matrix by using the QR algorithm with multiple shifts. The algorithm can find eigenvalues and both left and right eigenvectors. The right eigenvector is a vector x such that A*x = w*x, and the left eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex conjugate transposition of vector y). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. VNeeded - flag controlling whether eigenvectors are needed or not. If VNeeded is equal to: * 0, eigenvectors are not returned; * 1, right eigenvectors are returned; * 2, left eigenvectors are returned; * 3, both left and right eigenvectors are returned. Output parameters: WR - real parts of eigenvalues. Array whose index ranges within [0..N-1]. WR - imaginary parts of eigenvalues. Array whose index ranges within [0..N-1]. VL, VR - arrays of left and right eigenvectors (if they are needed). If WI[i]=0, the respective eigenvalue is a real number, and it corresponds to the column number I of matrices VL/VR. If WI[i]>0, we have a pair of complex conjugate numbers with positive and negative imaginary parts: the first eigenvalue WR[i] + sqrt(-1)*WI[i]; the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1]; WI[i]>0 WI[i+1] = -WI[i] < 0 In that case, the eigenvector corresponding to the first eigenvalue is located in i and i+1 columns of matrices VL/VR (the column number i contains the real part, and the column number i+1 contains the imaginary part), and the vector corresponding to the second eigenvalue is a complex conjugate to the first vector. Arrays whose indexes range within [0..N-1, 0..N-1]. Result: True, if the algorithm has converged. False, if the algorithm has not converged. Note 1: Some users may ask the following question: what if WI[N-1]>0? WI[N] must contain an eigenvalue which is complex conjugate to the N-th eigenvalue, but the array has only size N? The answer is as follows: such a situation cannot occur because the algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is strictly less than N-1. Note 2: The algorithm performance depends on the value of the internal parameter NS of the InternalSchurDecomposition subroutine which defines the number of shifts in the QR algorithm (similarly to the block width in block-matrix algorithms of linear algebra). If you require maximum performance on your machine, it is recommended to adjust this parameter manually. See also the InternalTREVC subroutine. The algorithm is based on the LAPACK 3.0 library. *************************************************************************/ public static bool rmatrixevd(double[,] a, int n, int vneeded, out double[] wr, out double[] wi, out double[,] vl, out double[,] vr) { wr = new double[0]; wi = new double[0]; vl = new double[0,0]; vr = new double[0,0]; bool result = evd.rmatrixevd(a, n, vneeded, ref wr, ref wi, ref vl, ref vr); return result; } } public partial class alglib { /************************************************************************* Generation of a random uniformly distributed (Haar) orthogonal matrix INPUT PARAMETERS: N - matrix size, N>=1 OUTPUT PARAMETERS: A - orthogonal NxN matrix, array[0..N-1,0..N-1] NOTE: this function uses algorithm described in Stewart, G. W. (1980), "The Efficient Generation of Random Orthogonal Matrices with an Application to Condition Estimators". Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it: * takes an NxN one * takes uniformly distributed unit vector of dimension N+1. * constructs a Householder reflection from the vector, then applies it to the smaller matrix (embedded in the larger size with a 1 at the bottom right corner). -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixrndorthogonal(int n, out double[,] a) { a = new double[0,0]; matgen.rmatrixrndorthogonal(n, ref a); return; } /************************************************************************* Generation of random NxN matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixrndcond(int n, double c, out double[,] a) { a = new double[0,0]; matgen.rmatrixrndcond(n, c, ref a); return; } /************************************************************************* Generation of a random Haar distributed orthogonal complex matrix INPUT PARAMETERS: N - matrix size, N>=1 OUTPUT PARAMETERS: A - orthogonal NxN matrix, array[0..N-1,0..N-1] NOTE: this function uses algorithm described in Stewart, G. W. (1980), "The Efficient Generation of Random Orthogonal Matrices with an Application to Condition Estimators". Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it: * takes an NxN one * takes uniformly distributed unit vector of dimension N+1. * constructs a Householder reflection from the vector, then applies it to the smaller matrix (embedded in the larger size with a 1 at the bottom right corner). -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixrndorthogonal(int n, out complex[,] a) { a = new complex[0,0]; matgen.cmatrixrndorthogonal(n, ref a); return; } /************************************************************************* Generation of random NxN complex matrix with given condition number C and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixrndcond(int n, double c, out complex[,] a) { a = new complex[0,0]; matgen.cmatrixrndcond(n, c, ref a); return; } /************************************************************************* Generation of random NxN symmetric matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void smatrixrndcond(int n, double c, out double[,] a) { a = new double[0,0]; matgen.smatrixrndcond(n, c, ref a); return; } /************************************************************************* Generation of random NxN symmetric positive definite matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random SPD matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void spdmatrixrndcond(int n, double c, out double[,] a) { a = new double[0,0]; matgen.spdmatrixrndcond(n, c, ref a); return; } /************************************************************************* Generation of random NxN Hermitian matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void hmatrixrndcond(int n, double c, out complex[,] a) { a = new complex[0,0]; matgen.hmatrixrndcond(n, c, ref a); return; } /************************************************************************* Generation of random NxN Hermitian positive definite matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random HPD matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void hpdmatrixrndcond(int n, double c, out complex[,] a) { a = new complex[0,0]; matgen.hpdmatrixrndcond(n, c, ref a); return; } /************************************************************************* Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..M-1, 0..N-1] M, N- matrix size OUTPUT PARAMETERS: A - A*Q, where Q is random NxN orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixrndorthogonalfromtheright(ref double[,] a, int m, int n) { matgen.rmatrixrndorthogonalfromtheright(ref a, m, n); return; } /************************************************************************* Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..M-1, 0..N-1] M, N- matrix size OUTPUT PARAMETERS: A - Q*A, where Q is random MxM orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixrndorthogonalfromtheleft(ref double[,] a, int m, int n) { matgen.rmatrixrndorthogonalfromtheleft(ref a, m, n); return; } /************************************************************************* Multiplication of MxN complex matrix by NxN random Haar distributed complex orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..M-1, 0..N-1] M, N- matrix size OUTPUT PARAMETERS: A - A*Q, where Q is random NxN orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixrndorthogonalfromtheright(ref complex[,] a, int m, int n) { matgen.cmatrixrndorthogonalfromtheright(ref a, m, n); return; } /************************************************************************* Multiplication of MxN complex matrix by MxM random Haar distributed complex orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..M-1, 0..N-1] M, N- matrix size OUTPUT PARAMETERS: A - Q*A, where Q is random MxM orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixrndorthogonalfromtheleft(ref complex[,] a, int m, int n) { matgen.cmatrixrndorthogonalfromtheleft(ref a, m, n); return; } /************************************************************************* Symmetric multiplication of NxN matrix by random Haar distributed orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..N-1, 0..N-1] N - matrix size OUTPUT PARAMETERS: A - Q'*A*Q, where Q is random NxN orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void smatrixrndmultiply(ref double[,] a, int n) { matgen.smatrixrndmultiply(ref a, n); return; } /************************************************************************* Hermitian multiplication of NxN matrix by random Haar distributed complex orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..N-1, 0..N-1] N - matrix size OUTPUT PARAMETERS: A - Q^H*A*Q, where Q is random NxN orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void hmatrixrndmultiply(ref complex[,] a, int n) { matgen.hmatrixrndmultiply(ref a, n); return; } } public partial class alglib { /************************************************************************* Sparse matrix structure. You should use ALGLIB functions to work with sparse matrix. Never try to access its fields directly! NOTES ON THE SPARSE STORAGE FORMATS Sparse matrices can be stored using several formats: * Hash-Table representation * Compressed Row Storage (CRS) * Skyline matrix storage (SKS) Each of the formats has benefits and drawbacks: * Hash-table is good for dynamic operations (insertion of new elements), but does not support linear algebra operations * CRS is good for operations like matrix-vector or matrix-matrix products, but its initialization is less convenient - you have to tell row sizes at the initialization, and you have to fill matrix only row by row, from left to right. * SKS is a special format which is used to store triangular factors from Cholesky factorization. It does not support dynamic modification, and support for linear algebra operations is very limited. Tables below outline information about these two formats: OPERATIONS WITH MATRIX HASH CRS SKS creation + + + SparseGet + + + SparseRewriteExisting + + + SparseSet + SparseAdd + SparseGetRow + + SparseGetCompressedRow + + sparse-dense linear algebra + + *************************************************************************/ public class sparsematrix : alglibobject { // // Public declarations // public sparsematrix() { _innerobj = new sparse.sparsematrix(); } public override alglib.alglibobject make_copy() { return new sparsematrix((sparse.sparsematrix)_innerobj.make_copy()); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private sparse.sparsematrix _innerobj; public sparse.sparsematrix innerobj { get { return _innerobj; } } public sparsematrix(sparse.sparsematrix obj) { _innerobj = obj; } } /************************************************************************* Temporary buffers for sparse matrix operations. You should pass an instance of this structure to factorization functions. It allows to reuse memory during repeated sparse factorizations. You do not have to call some initialization function - simply passing an instance to factorization function is enough. *************************************************************************/ public class sparsebuffers : alglibobject { // // Public declarations // public sparsebuffers() { _innerobj = new sparse.sparsebuffers(); } public override alglib.alglibobject make_copy() { return new sparsebuffers((sparse.sparsebuffers)_innerobj.make_copy()); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private sparse.sparsebuffers _innerobj; public sparse.sparsebuffers innerobj { get { return _innerobj; } } public sparsebuffers(sparse.sparsebuffers obj) { _innerobj = obj; } } /************************************************************************* This function creates sparse matrix in a Hash-Table format. This function creates Hast-Table matrix, which can be converted to CRS format after its initialization is over. Typical usage scenario for a sparse matrix is: 1. creation in a Hash-Table format 2. insertion of the matrix elements 3. conversion to the CRS representation 4. matrix is passed to some linear algebra algorithm Some information about different matrix formats can be found below, in the "NOTES" section. INPUT PARAMETERS M - number of rows in a matrix, M>=1 N - number of columns in a matrix, N>=1 K - K>=0, expected number of non-zero elements in a matrix. K can be inexact approximation, can be less than actual number of elements (table will grow when needed) or even zero). It is important to understand that although hash-table may grow automatically, it is better to provide good estimate of data size. OUTPUT PARAMETERS S - sparse M*N matrix in Hash-Table representation. All elements of the matrix are zero. NOTE 1 Hash-tables use memory inefficiently, and they have to keep some amount of the "spare memory" in order to have good performance. Hash table for matrix with K non-zero elements will need C*K*(8+2*sizeof(int)) bytes, where C is a small constant, about 1.5-2 in magnitude. CRS storage, from the other side, is more memory-efficient, and needs just K*(8+sizeof(int))+M*sizeof(int) bytes, where M is a number of rows in a matrix. When you convert from the Hash-Table to CRS representation, all unneeded memory will be freed. NOTE 2 Comments of SparseMatrix structure outline information about different sparse storage formats. We recommend you to read them before starting to use ALGLIB sparse matrices. NOTE 3 This function completely overwrites S with new sparse matrix. Previously allocated storage is NOT reused. If you want to reuse already allocated memory, call SparseCreateBuf function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsecreate(int m, int n, int k, out sparsematrix s) { s = new sparsematrix(); sparse.sparsecreate(m, n, k, s.innerobj); return; } public static void sparsecreate(int m, int n, out sparsematrix s) { int k; s = new sparsematrix(); k = 0; sparse.sparsecreate(m, n, k, s.innerobj); return; } /************************************************************************* This version of SparseCreate function creates sparse matrix in Hash-Table format, reusing previously allocated storage as much as possible. Read comments for SparseCreate() for more information. INPUT PARAMETERS M - number of rows in a matrix, M>=1 N - number of columns in a matrix, N>=1 K - K>=0, expected number of non-zero elements in a matrix. K can be inexact approximation, can be less than actual number of elements (table will grow when needed) or even zero). It is important to understand that although hash-table may grow automatically, it is better to provide good estimate of data size. S - SparseMatrix structure which MAY contain some already allocated storage. OUTPUT PARAMETERS S - sparse M*N matrix in Hash-Table representation. All elements of the matrix are zero. Previously allocated storage is reused, if its size is compatible with expected number of non-zeros K. -- ALGLIB PROJECT -- Copyright 14.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsecreatebuf(int m, int n, int k, sparsematrix s) { sparse.sparsecreatebuf(m, n, k, s.innerobj); return; } public static void sparsecreatebuf(int m, int n, sparsematrix s) { int k; k = 0; sparse.sparsecreatebuf(m, n, k, s.innerobj); return; } /************************************************************************* This function creates sparse matrix in a CRS format (expert function for situations when you are running out of memory). This function creates CRS matrix. Typical usage scenario for a CRS matrix is: 1. creation (you have to tell number of non-zero elements at each row at this moment) 2. insertion of the matrix elements (row by row, from left to right) 3. matrix is passed to some linear algebra algorithm This function is a memory-efficient alternative to SparseCreate(), but it is more complex because it requires you to know in advance how large your matrix is. Some information about different matrix formats can be found in comments on SparseMatrix structure. We recommend you to read them before starting to use ALGLIB sparse matrices.. INPUT PARAMETERS M - number of rows in a matrix, M>=1 N - number of columns in a matrix, N>=1 NER - number of elements at each row, array[M], NER[I]>=0 OUTPUT PARAMETERS S - sparse M*N matrix in CRS representation. You have to fill ALL non-zero elements by calling SparseSet() BEFORE you try to use this matrix. NOTE: this function completely overwrites S with new sparse matrix. Previously allocated storage is NOT reused. If you want to reuse already allocated memory, call SparseCreateCRSBuf function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsecreatecrs(int m, int n, int[] ner, out sparsematrix s) { s = new sparsematrix(); sparse.sparsecreatecrs(m, n, ner, s.innerobj); return; } /************************************************************************* This function creates sparse matrix in a CRS format (expert function for situations when you are running out of memory). This version of CRS matrix creation function may reuse memory already allocated in S. This function creates CRS matrix. Typical usage scenario for a CRS matrix is: 1. creation (you have to tell number of non-zero elements at each row at this moment) 2. insertion of the matrix elements (row by row, from left to right) 3. matrix is passed to some linear algebra algorithm This function is a memory-efficient alternative to SparseCreate(), but it is more complex because it requires you to know in advance how large your matrix is. Some information about different matrix formats can be found in comments on SparseMatrix structure. We recommend you to read them before starting to use ALGLIB sparse matrices.. INPUT PARAMETERS M - number of rows in a matrix, M>=1 N - number of columns in a matrix, N>=1 NER - number of elements at each row, array[M], NER[I]>=0 S - sparse matrix structure with possibly preallocated memory. OUTPUT PARAMETERS S - sparse M*N matrix in CRS representation. You have to fill ALL non-zero elements by calling SparseSet() BEFORE you try to use this matrix. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsecreatecrsbuf(int m, int n, int[] ner, sparsematrix s) { sparse.sparsecreatecrsbuf(m, n, ner, s.innerobj); return; } /************************************************************************* This function creates sparse matrix in a SKS format (skyline storage format). In most cases you do not need this function - CRS format better suits most use cases. INPUT PARAMETERS M, N - number of rows(M) and columns (N) in a matrix: * M=N (as for now, ALGLIB supports only square SKS) * N>=1 * M>=1 D - "bottom" bandwidths, array[M], D[I]>=0. I-th element stores number of non-zeros at I-th row, below the diagonal (diagonal itself is not included) U - "top" bandwidths, array[N], U[I]>=0. I-th element stores number of non-zeros at I-th row, above the diagonal (diagonal itself is not included) OUTPUT PARAMETERS S - sparse M*N matrix in SKS representation. All elements are filled by zeros. You may use SparseRewriteExisting() to change their values. NOTE: this function completely overwrites S with new sparse matrix. Previously allocated storage is NOT reused. If you want to reuse already allocated memory, call SparseCreateSKSBuf function. -- ALGLIB PROJECT -- Copyright 13.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsecreatesks(int m, int n, int[] d, int[] u, out sparsematrix s) { s = new sparsematrix(); sparse.sparsecreatesks(m, n, d, u, s.innerobj); return; } /************************************************************************* This is "buffered" version of SparseCreateSKS() which reuses memory previously allocated in S (of course, memory is reallocated if needed). This function creates sparse matrix in a SKS format (skyline storage format). In most cases you do not need this function - CRS format better suits most use cases. INPUT PARAMETERS M, N - number of rows(M) and columns (N) in a matrix: * M=N (as for now, ALGLIB supports only square SKS) * N>=1 * M>=1 D - "bottom" bandwidths, array[M], 0<=D[I]<=I. I-th element stores number of non-zeros at I-th row, below the diagonal (diagonal itself is not included) U - "top" bandwidths, array[N], 0<=U[I]<=I. I-th element stores number of non-zeros at I-th row, above the diagonal (diagonal itself is not included) OUTPUT PARAMETERS S - sparse M*N matrix in SKS representation. All elements are filled by zeros. You may use SparseSet()/SparseAdd() to change their values. -- ALGLIB PROJECT -- Copyright 13.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsecreatesksbuf(int m, int n, int[] d, int[] u, sparsematrix s) { sparse.sparsecreatesksbuf(m, n, d, u, s.innerobj); return; } /************************************************************************* This function copies S0 to S1. This function completely deallocates memory owned by S1 before creating a copy of S0. If you want to reuse memory, use SparseCopyBuf. NOTE: this function does not verify its arguments, it just copies all fields of the structure. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsecopy(sparsematrix s0, out sparsematrix s1) { s1 = new sparsematrix(); sparse.sparsecopy(s0.innerobj, s1.innerobj); return; } /************************************************************************* This function copies S0 to S1. Memory already allocated in S1 is reused as much as possible. NOTE: this function does not verify its arguments, it just copies all fields of the structure. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsecopybuf(sparsematrix s0, sparsematrix s1) { sparse.sparsecopybuf(s0.innerobj, s1.innerobj); return; } /************************************************************************* This function efficiently swaps contents of S0 and S1. -- ALGLIB PROJECT -- Copyright 16.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparseswap(sparsematrix s0, sparsematrix s1) { sparse.sparseswap(s0.innerobj, s1.innerobj); return; } /************************************************************************* This function adds value to S[i,j] - element of the sparse matrix. Matrix must be in a Hash-Table mode. In case S[i,j] already exists in the table, V i added to its value. In case S[i,j] is non-existent, it is inserted in the table. Table automatically grows when necessary. INPUT PARAMETERS S - sparse M*N matrix in Hash-Table representation. Exception will be thrown for CRS matrix. I - row index of the element to modify, 0<=I=i are used, and lower triangle is ignored (it can be empty - these elements are not referenced at all). * if lower triangle is given, only S[i,j] for j<=i are used, and upper triangle is ignored. X - array[N], input vector. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. Y - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS Y - array[M], S*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsesmv(sparsematrix s, bool isupper, double[] x, ref double[] y) { sparse.sparsesmv(s.innerobj, isupper, x, ref y); return; } /************************************************************************* This function calculates vector-matrix-vector product x'*S*x, where S is symmetric matrix. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse M*M matrix in CRS or SKS format. IsUpper - whether upper or lower triangle of S is given: * if upper triangle is given, only S[i,j] for j>=i are used, and lower triangle is ignored (it can be empty - these elements are not referenced at all). * if lower triangle is given, only S[i,j] for j<=i are used, and upper triangle is ignored. X - array[N], input vector. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. RESULT x'*S*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 27.01.2014 by Bochkanov Sergey *************************************************************************/ public static double sparsevsmv(sparsematrix s, bool isupper, double[] x) { double result = sparse.sparsevsmv(s.innerobj, isupper, x); return result; } /************************************************************************* This function calculates matrix-matrix product S*A. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse M*N matrix in CRS or SKS format. A - array[N][K], input dense matrix. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. K - number of columns of matrix (A). B - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS B - array[M][K], S*A NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsemm(sparsematrix s, double[,] a, int k, ref double[,] b) { sparse.sparsemm(s.innerobj, a, k, ref b); return; } /************************************************************************* This function calculates matrix-matrix product S^T*A. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse M*N matrix in CRS or SKS format. A - array[M][K], input dense matrix. For performance reasons we make only quick checks - we check that array size is at least M, but we do not check for NAN's or INF's. K - number of columns of matrix (A). B - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS B - array[N][K], S^T*A NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsemtm(sparsematrix s, double[,] a, int k, ref double[,] b) { sparse.sparsemtm(s.innerobj, a, k, ref b); return; } /************************************************************************* This function simultaneously calculates two matrix-matrix products: S*A and S^T*A. S must be square (non-rectangular) matrix stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse N*N matrix in CRS or SKS format. A - array[N][K], input dense matrix. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. K - number of columns of matrix (A). B0 - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. B1 - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS B0 - array[N][K], S*A B1 - array[N][K], S^T*A NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsemm2(sparsematrix s, double[,] a, int k, ref double[,] b0, ref double[,] b1) { sparse.sparsemm2(s.innerobj, a, k, ref b0, ref b1); return; } /************************************************************************* This function calculates matrix-matrix product S*A, when S is symmetric matrix. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse M*M matrix in CRS or SKS format. IsUpper - whether upper or lower triangle of S is given: * if upper triangle is given, only S[i,j] for j>=i are used, and lower triangle is ignored (it can be empty - these elements are not referenced at all). * if lower triangle is given, only S[i,j] for j<=i are used, and upper triangle is ignored. A - array[N][K], input dense matrix. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. K - number of columns of matrix (A). B - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS B - array[M][K], S*A NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsesmm(sparsematrix s, bool isupper, double[,] a, int k, ref double[,] b) { sparse.sparsesmm(s.innerobj, isupper, a, k, ref b); return; } /************************************************************************* This function calculates matrix-vector product op(S)*x, when x is vector, S is symmetric triangular matrix, op(S) is transposition or no operation. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse square matrix in CRS or SKS format. IsUpper - whether upper or lower triangle of S is used: * if upper triangle is given, only S[i,j] for j>=i are used, and lower triangle is ignored (it can be empty - these elements are not referenced at all). * if lower triangle is given, only S[i,j] for j<=i are used, and upper triangle is ignored. IsUnit - unit or non-unit diagonal: * if True, diagonal elements of triangular matrix are considered equal to 1.0. Actual elements stored in S are not referenced at all. * if False, diagonal stored in S is used OpType - operation type: * if 0, S*x is calculated * if 1, (S^T)*x is calculated (transposition) X - array[N] which stores input vector. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. Y - possibly preallocated input buffer. Automatically resized if its size is too small. OUTPUT PARAMETERS Y - array[N], op(S)*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 20.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsetrmv(sparsematrix s, bool isupper, bool isunit, int optype, ref double[] x, ref double[] y) { sparse.sparsetrmv(s.innerobj, isupper, isunit, optype, x, ref y); return; } /************************************************************************* This function solves linear system op(S)*y=x where x is vector, S is symmetric triangular matrix, op(S) is transposition or no operation. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse square matrix in CRS or SKS format. IsUpper - whether upper or lower triangle of S is used: * if upper triangle is given, only S[i,j] for j>=i are used, and lower triangle is ignored (it can be empty - these elements are not referenced at all). * if lower triangle is given, only S[i,j] for j<=i are used, and upper triangle is ignored. IsUnit - unit or non-unit diagonal: * if True, diagonal elements of triangular matrix are considered equal to 1.0. Actual elements stored in S are not referenced at all. * if False, diagonal stored in S is used. It is your responsibility to make sure that diagonal is non-zero. OpType - operation type: * if 0, S*x is calculated * if 1, (S^T)*x is calculated (transposition) X - array[N] which stores input vector. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. OUTPUT PARAMETERS X - array[N], inv(op(S))*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. NOTE: no assertion or tests are done during algorithm operation. It is your responsibility to provide invertible matrix to algorithm. -- ALGLIB PROJECT -- Copyright 20.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsetrsv(sparsematrix s, bool isupper, bool isunit, int optype, ref double[] x) { sparse.sparsetrsv(s.innerobj, isupper, isunit, optype, x); return; } /************************************************************************* This procedure resizes Hash-Table matrix. It can be called when you have deleted too many elements from the matrix, and you want to free unneeded memory. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparseresizematrix(sparsematrix s) { sparse.sparseresizematrix(s.innerobj); return; } /************************************************************************* This function is used to enumerate all elements of the sparse matrix. Before first call user initializes T0 and T1 counters by zero. These counters are used to remember current position in a matrix; after each call they are updated by the function. Subsequent calls to this function return non-zero elements of the sparse matrix, one by one. If you enumerate CRS matrix, matrix is traversed from left to right, from top to bottom. In case you enumerate matrix stored as Hash table, elements are returned in random order. EXAMPLE > T0=0 > T1=0 > while SparseEnumerate(S,T0,T1,I,J,V) do > ....do something with I,J,V INPUT PARAMETERS S - sparse M*N matrix in Hash-Table or CRS representation. T0 - internal counter T1 - internal counter OUTPUT PARAMETERS T0 - new value of the internal counter T1 - new value of the internal counter I - row index of non-zero element, 0<=I>N * worst case - N>>M, small M, large N, matrix does not fit in CPU cache COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that LU decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - array[0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. OUTPUT PARAMETERS: A - matrices L and U in compact form: * L is stored under main diagonal * U is stored on and above main diagonal Pivots - permutation matrix in compact form. array[0..Min(M-1,N-1)]. -- ALGLIB routine -- 10.01.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixlu(ref double[,] a, int m, int n, out int[] pivots) { pivots = new int[0]; trfac.rmatrixlu(ref a, m, n, ref pivots); return; } public static void smp_rmatrixlu(ref double[,] a, int m, int n, out int[] pivots) { pivots = new int[0]; trfac._pexec_rmatrixlu(ref a, m, n, ref pivots); return; } /************************************************************************* LU decomposition of a general complex matrix with row pivoting A is represented as A = P*L*U, where: * L is lower unitriangular matrix * U is upper triangular matrix * P = P0*P1*...*PK, K=min(M,N)-1, Pi - permutation matrix for I and Pivots[I] This is cache-oblivous implementation of LU decomposition. It is optimized for square matrices. As for rectangular matrices: * best case - M>>N * worst case - N>>M, small M, large N, matrix does not fit in CPU cache COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that LU decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - array[0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. OUTPUT PARAMETERS: A - matrices L and U in compact form: * L is stored under main diagonal * U is stored on and above main diagonal Pivots - permutation matrix in compact form. array[0..Min(M-1,N-1)]. -- ALGLIB routine -- 10.01.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixlu(ref complex[,] a, int m, int n, out int[] pivots) { pivots = new int[0]; trfac.cmatrixlu(ref a, m, n, ref pivots); return; } public static void smp_cmatrixlu(ref complex[,] a, int m, int n, out int[] pivots) { pivots = new int[0]; trfac._pexec_cmatrixlu(ref a, m, n, ref pivots); return; } /************************************************************************* Cache-oblivious Cholesky decomposition The algorithm computes Cholesky decomposition of a Hermitian positive- definite matrix. The result of an algorithm is a representation of A as A=U'*U or A=L*L' (here X' detones conj(X^T)). COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that Cholesky decomposition is harder ! to parallelize than, say, matrix-matrix product - this algorithm has ! several synchronization points which can not be avoided. However, ! parallelism starts to be profitable starting from N=500. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - upper or lower triangle of a factorized matrix. array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - if IsUpper=True, then A contains an upper triangle of a symmetric matrix, otherwise A contains a lower one. OUTPUT PARAMETERS: A - the result of factorization. If IsUpper=True, then the upper triangle contains matrix U, so that A = U'*U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. RESULT: If the matrix is positive-definite, the function returns True. Otherwise, the function returns False. Contents of A is not determined in such case. -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static bool hpdmatrixcholesky(ref complex[,] a, int n, bool isupper) { bool result = trfac.hpdmatrixcholesky(ref a, n, isupper); return result; } public static bool smp_hpdmatrixcholesky(ref complex[,] a, int n, bool isupper) { bool result = trfac._pexec_hpdmatrixcholesky(ref a, n, isupper); return result; } /************************************************************************* Cache-oblivious Cholesky decomposition The algorithm computes Cholesky decomposition of a symmetric positive- definite matrix. The result of an algorithm is a representation of A as A=U^T*U or A=L*L^T COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that Cholesky decomposition is harder ! to parallelize than, say, matrix-matrix product - this algorithm has ! several synchronization points which can not be avoided. However, ! parallelism starts to be profitable starting from N=500. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - upper or lower triangle of a factorized matrix. array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - if IsUpper=True, then A contains an upper triangle of a symmetric matrix, otherwise A contains a lower one. OUTPUT PARAMETERS: A - the result of factorization. If IsUpper=True, then the upper triangle contains matrix U, so that A = U^T*U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. RESULT: If the matrix is positive-definite, the function returns True. Otherwise, the function returns False. Contents of A is not determined in such case. -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static bool spdmatrixcholesky(ref double[,] a, int n, bool isupper) { bool result = trfac.spdmatrixcholesky(ref a, n, isupper); return result; } public static bool smp_spdmatrixcholesky(ref double[,] a, int n, bool isupper) { bool result = trfac._pexec_spdmatrixcholesky(ref a, n, isupper); return result; } /************************************************************************* Update of Cholesky decomposition: rank-1 update to original A. "Buffered" version which uses preallocated buffer which is saved between subsequent function calls. This function uses internally allocated buffer which is not saved between subsequent calls. So, if you perform a lot of subsequent updates, we recommend you to use "buffered" version of this function: SPDMatrixCholeskyUpdateAdd1Buf(). INPUT PARAMETERS: A - upper or lower Cholesky factor. array with elements [0..N-1, 0..N-1]. Exception is thrown if array size is too small. N - size of matrix A, N>0 IsUpper - if IsUpper=True, then A contains upper Cholesky factor; otherwise A contains a lower one. U - array[N], rank-1 update to A: A_mod = A + u*u' Exception is thrown if array size is too small. BufR - possibly preallocated buffer; automatically resized if needed. It is recommended to reuse this buffer if you perform a lot of subsequent decompositions. OUTPUT PARAMETERS: A - updated factorization. If IsUpper=True, then the upper triangle contains matrix U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. NOTE: this function always succeeds, so it does not return completion code NOTE: this function checks sizes of input arrays, but it does NOT checks for presence of infinities or NAN's. -- ALGLIB -- 03.02.2014 Sergey Bochkanov *************************************************************************/ public static void spdmatrixcholeskyupdateadd1(ref double[,] a, int n, bool isupper, double[] u) { trfac.spdmatrixcholeskyupdateadd1(a, n, isupper, u); return; } /************************************************************************* Update of Cholesky decomposition: "fixing" some variables. This function uses internally allocated buffer which is not saved between subsequent calls. So, if you perform a lot of subsequent updates, we recommend you to use "buffered" version of this function: SPDMatrixCholeskyUpdateFixBuf(). "FIXING" EXPLAINED: Suppose we have N*N positive definite matrix A. "Fixing" some variable means filling corresponding row/column of A by zeros, and setting diagonal element to 1. For example, if we fix 2nd variable in 4*4 matrix A, it becomes Af: ( A00 A01 A02 A03 ) ( Af00 0 Af02 Af03 ) ( A10 A11 A12 A13 ) ( 0 1 0 0 ) ( A20 A21 A22 A23 ) => ( Af20 0 Af22 Af23 ) ( A30 A31 A32 A33 ) ( Af30 0 Af32 Af33 ) If we have Cholesky decomposition of A, it must be recalculated after variables were fixed. However, it is possible to use efficient algorithm, which needs O(K*N^2) time to "fix" K variables, given Cholesky decomposition of original, "unfixed" A. INPUT PARAMETERS: A - upper or lower Cholesky factor. array with elements [0..N-1, 0..N-1]. Exception is thrown if array size is too small. N - size of matrix A, N>0 IsUpper - if IsUpper=True, then A contains upper Cholesky factor; otherwise A contains a lower one. Fix - array[N], I-th element is True if I-th variable must be fixed. Exception is thrown if array size is too small. BufR - possibly preallocated buffer; automatically resized if needed. It is recommended to reuse this buffer if you perform a lot of subsequent decompositions. OUTPUT PARAMETERS: A - updated factorization. If IsUpper=True, then the upper triangle contains matrix U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. NOTE: this function always succeeds, so it does not return completion code NOTE: this function checks sizes of input arrays, but it does NOT checks for presence of infinities or NAN's. NOTE: this function is efficient only for moderate amount of updated variables - say, 0.1*N or 0.3*N. For larger amount of variables it will still work, but you may get better performance with straightforward Cholesky. -- ALGLIB -- 03.02.2014 Sergey Bochkanov *************************************************************************/ public static void spdmatrixcholeskyupdatefix(ref double[,] a, int n, bool isupper, bool[] fix) { trfac.spdmatrixcholeskyupdatefix(a, n, isupper, fix); return; } /************************************************************************* Update of Cholesky decomposition: rank-1 update to original A. "Buffered" version which uses preallocated buffer which is saved between subsequent function calls. See comments for SPDMatrixCholeskyUpdateAdd1() for more information. INPUT PARAMETERS: A - upper or lower Cholesky factor. array with elements [0..N-1, 0..N-1]. Exception is thrown if array size is too small. N - size of matrix A, N>0 IsUpper - if IsUpper=True, then A contains upper Cholesky factor; otherwise A contains a lower one. U - array[N], rank-1 update to A: A_mod = A + u*u' Exception is thrown if array size is too small. BufR - possibly preallocated buffer; automatically resized if needed. It is recommended to reuse this buffer if you perform a lot of subsequent decompositions. OUTPUT PARAMETERS: A - updated factorization. If IsUpper=True, then the upper triangle contains matrix U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. -- ALGLIB -- 03.02.2014 Sergey Bochkanov *************************************************************************/ public static void spdmatrixcholeskyupdateadd1buf(ref double[,] a, int n, bool isupper, double[] u, ref double[] bufr) { trfac.spdmatrixcholeskyupdateadd1buf(a, n, isupper, u, ref bufr); return; } /************************************************************************* Update of Cholesky decomposition: "fixing" some variables. "Buffered" version which uses preallocated buffer which is saved between subsequent function calls. See comments for SPDMatrixCholeskyUpdateFix() for more information. INPUT PARAMETERS: A - upper or lower Cholesky factor. array with elements [0..N-1, 0..N-1]. Exception is thrown if array size is too small. N - size of matrix A, N>0 IsUpper - if IsUpper=True, then A contains upper Cholesky factor; otherwise A contains a lower one. Fix - array[N], I-th element is True if I-th variable must be fixed. Exception is thrown if array size is too small. BufR - possibly preallocated buffer; automatically resized if needed. It is recommended to reuse this buffer if you perform a lot of subsequent decompositions. OUTPUT PARAMETERS: A - updated factorization. If IsUpper=True, then the upper triangle contains matrix U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. -- ALGLIB -- 03.02.2014 Sergey Bochkanov *************************************************************************/ public static void spdmatrixcholeskyupdatefixbuf(ref double[,] a, int n, bool isupper, bool[] fix, ref double[] bufr) { trfac.spdmatrixcholeskyupdatefixbuf(a, n, isupper, fix, ref bufr); return; } /************************************************************************* Sparse Cholesky decomposition for skyline matrixm using in-place algorithm without allocating additional storage. The algorithm computes Cholesky decomposition of a symmetric positive- definite sparse matrix. The result of an algorithm is a representation of A as A=U^T*U or A=L*L^T This function is a more efficient alternative to general, but slower SparseCholeskyX(), because it does not create temporary copies of the target. It performs factorization in-place, which gives best performance on low-profile matrices. Its drawback, however, is that it can not perform profile-reducing permutation of input matrix. INPUT PARAMETERS: A - sparse matrix in skyline storage (SKS) format. N - size of matrix A (can be smaller than actual size of A) IsUpper - if IsUpper=True, then factorization is performed on upper triangle. Another triangle is ignored (it may contant some data, but it is not changed). OUTPUT PARAMETERS: A - the result of factorization, stored in SKS. If IsUpper=True, then the upper triangle contains matrix U, such that A = U^T*U. Lower triangle is not changed. Similarly, if IsUpper = False. In this case L is returned, and we have A = L*(L^T). Note that THIS function does not perform permutation of rows to reduce bandwidth. RESULT: If the matrix is positive-definite, the function returns True. Otherwise, the function returns False. Contents of A is not determined in such case. NOTE: for performance reasons this function does NOT check that input matrix includes only finite values. It is your responsibility to make sure that there are no infinite or NAN values in the matrix. -- ALGLIB routine -- 16.01.2014 Bochkanov Sergey *************************************************************************/ public static bool sparsecholeskyskyline(sparsematrix a, int n, bool isupper) { bool result = trfac.sparsecholeskyskyline(a.innerobj, n, isupper); return result; } } public partial class alglib { /************************************************************************* Estimate of a matrix condition number (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixrcond1(double[,] a, int n) { double result = rcond.rmatrixrcond1(a, n); return result; } /************************************************************************* Estimate of a matrix condition number (infinity-norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixrcondinf(double[,] a, int n) { double result = rcond.rmatrixrcondinf(a, n); return result; } /************************************************************************* Condition number estimate of a symmetric positive definite matrix. The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). It should be noted that 1-norm and inf-norm of condition numbers of symmetric matrices are equal, so the algorithm doesn't take into account the differences between these types of norms. Input parameters: A - symmetric positive definite matrix which is given by its upper or lower triangle depending on the value of IsUpper. Array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. Result: 1/LowerBound(cond(A)), if matrix A is positive definite, -1, if matrix A is not positive definite, and its condition number could not be found by this algorithm. NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double spdmatrixrcond(double[,] a, int n, bool isupper) { double result = rcond.spdmatrixrcond(a, n, isupper); return result; } /************************************************************************* Triangular matrix: estimate of a condition number (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array[0..N-1, 0..N-1]. N - size of A. IsUpper - True, if the matrix is upper triangular. IsUnit - True, if the matrix has a unit diagonal. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixtrrcond1(double[,] a, int n, bool isupper, bool isunit) { double result = rcond.rmatrixtrrcond1(a, n, isupper, isunit); return result; } /************************************************************************* Triangular matrix: estimate of a matrix condition number (infinity-norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - True, if the matrix is upper triangular. IsUnit - True, if the matrix has a unit diagonal. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixtrrcondinf(double[,] a, int n, bool isupper, bool isunit) { double result = rcond.rmatrixtrrcondinf(a, n, isupper, isunit); return result; } /************************************************************************* Condition number estimate of a Hermitian positive definite matrix. The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). It should be noted that 1-norm and inf-norm of condition numbers of symmetric matrices are equal, so the algorithm doesn't take into account the differences between these types of norms. Input parameters: A - Hermitian positive definite matrix which is given by its upper or lower triangle depending on the value of IsUpper. Array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. Result: 1/LowerBound(cond(A)), if matrix A is positive definite, -1, if matrix A is not positive definite, and its condition number could not be found by this algorithm. NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double hpdmatrixrcond(complex[,] a, int n, bool isupper) { double result = rcond.hpdmatrixrcond(a, n, isupper); return result; } /************************************************************************* Estimate of a matrix condition number (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixrcond1(complex[,] a, int n) { double result = rcond.cmatrixrcond1(a, n); return result; } /************************************************************************* Estimate of a matrix condition number (infinity-norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixrcondinf(complex[,] a, int n) { double result = rcond.cmatrixrcondinf(a, n); return result; } /************************************************************************* Estimate of the condition number of a matrix given by its LU decomposition (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: LUA - LU decomposition of a matrix in compact form. Output of the RMatrixLU subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixlurcond1(double[,] lua, int n) { double result = rcond.rmatrixlurcond1(lua, n); return result; } /************************************************************************* Estimate of the condition number of a matrix given by its LU decomposition (infinity norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: LUA - LU decomposition of a matrix in compact form. Output of the RMatrixLU subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixlurcondinf(double[,] lua, int n) { double result = rcond.rmatrixlurcondinf(lua, n); return result; } /************************************************************************* Condition number estimate of a symmetric positive definite matrix given by Cholesky decomposition. The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). It should be noted that 1-norm and inf-norm condition numbers of symmetric matrices are equal, so the algorithm doesn't take into account the differences between these types of norms. Input parameters: CD - Cholesky decomposition of matrix A, output of SMatrixCholesky subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double spdmatrixcholeskyrcond(double[,] a, int n, bool isupper) { double result = rcond.spdmatrixcholeskyrcond(a, n, isupper); return result; } /************************************************************************* Condition number estimate of a Hermitian positive definite matrix given by Cholesky decomposition. The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). It should be noted that 1-norm and inf-norm condition numbers of symmetric matrices are equal, so the algorithm doesn't take into account the differences between these types of norms. Input parameters: CD - Cholesky decomposition of matrix A, output of SMatrixCholesky subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double hpdmatrixcholeskyrcond(complex[,] a, int n, bool isupper) { double result = rcond.hpdmatrixcholeskyrcond(a, n, isupper); return result; } /************************************************************************* Estimate of the condition number of a matrix given by its LU decomposition (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: LUA - LU decomposition of a matrix in compact form. Output of the CMatrixLU subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixlurcond1(complex[,] lua, int n) { double result = rcond.cmatrixlurcond1(lua, n); return result; } /************************************************************************* Estimate of the condition number of a matrix given by its LU decomposition (infinity norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: LUA - LU decomposition of a matrix in compact form. Output of the CMatrixLU subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixlurcondinf(complex[,] lua, int n) { double result = rcond.cmatrixlurcondinf(lua, n); return result; } /************************************************************************* Triangular matrix: estimate of a condition number (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array[0..N-1, 0..N-1]. N - size of A. IsUpper - True, if the matrix is upper triangular. IsUnit - True, if the matrix has a unit diagonal. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixtrrcond1(complex[,] a, int n, bool isupper, bool isunit) { double result = rcond.cmatrixtrrcond1(a, n, isupper, isunit); return result; } /************************************************************************* Triangular matrix: estimate of a matrix condition number (infinity-norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - True, if the matrix is upper triangular. IsUnit - True, if the matrix has a unit diagonal. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixtrrcondinf(complex[,] a, int n, bool isupper, bool isunit) { double result = rcond.cmatrixtrrcondinf(a, n, isupper, isunit); return result; } } public partial class alglib { /************************************************************************* Matrix inverse report: * R1 reciprocal of condition number in 1-norm * RInf reciprocal of condition number in inf-norm *************************************************************************/ public class matinvreport : alglibobject { // // Public declarations // public double r1 { get { return _innerobj.r1; } set { _innerobj.r1 = value; } } public double rinf { get { return _innerobj.rinf; } set { _innerobj.rinf = value; } } public matinvreport() { _innerobj = new matinv.matinvreport(); } public override alglib.alglibobject make_copy() { return new matinvreport((matinv.matinvreport)_innerobj.make_copy()); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private matinv.matinvreport _innerobj; public matinv.matinvreport innerobj { get { return _innerobj; } } public matinvreport(matinv.matinvreport obj) { _innerobj = obj; } } /************************************************************************* Inversion of a matrix given by its LU decomposition. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that matrix inversion is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - LU decomposition of the matrix (output of RMatrixLU subroutine). Pivots - table of permutations (the output of RMatrixLU subroutine). N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) OUTPUT PARAMETERS: Info - return code: * -3 A is singular, or VERY close to singular. it is filled by zeros in such cases. * 1 task is solved (but matrix A may be ill-conditioned, check R1/RInf parameters for condition numbers). Rep - solver report, see below for more info A - inverse of matrix A. Array whose indexes range within [0..N-1, 0..N-1]. SOLVER REPORT Subroutine sets following fields of the Rep structure: * R1 reciprocal of condition number: 1/cond(A), 1-norm. * RInf reciprocal of condition number: 1/cond(A), inf-norm. -- ALGLIB routine -- 05.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixluinverse(ref double[,] a, int[] pivots, int n, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv.rmatrixluinverse(ref a, pivots, n, ref info, rep.innerobj); return; } public static void smp_rmatrixluinverse(ref double[,] a, int[] pivots, int n, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv._pexec_rmatrixluinverse(ref a, pivots, n, ref info, rep.innerobj); return; } public static void rmatrixluinverse(ref double[,] a, int[] pivots, out int info, out matinvreport rep) { int n; if( (ap.cols(a)!=ap.rows(a)) || (ap.cols(a)!=ap.len(pivots))) throw new alglibexception("Error while calling 'rmatrixluinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); matinv.rmatrixluinverse(ref a, pivots, n, ref info, rep.innerobj); return; } public static void smp_rmatrixluinverse(ref double[,] a, int[] pivots, out int info, out matinvreport rep) { int n; if( (ap.cols(a)!=ap.rows(a)) || (ap.cols(a)!=ap.len(pivots))) throw new alglibexception("Error while calling 'rmatrixluinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); matinv._pexec_rmatrixluinverse(ref a, pivots, n, ref info, rep.innerobj); return; } /************************************************************************* Inversion of a general matrix. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that matrix inversion is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix. N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) Output parameters: Info - return code, same as in RMatrixLUInverse Rep - solver report, same as in RMatrixLUInverse A - inverse of matrix A, same as in RMatrixLUInverse Result: True, if the matrix is not singular. False, if the matrix is singular. -- ALGLIB -- Copyright 2005-2010 by Bochkanov Sergey *************************************************************************/ public static void rmatrixinverse(ref double[,] a, int n, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv.rmatrixinverse(ref a, n, ref info, rep.innerobj); return; } public static void smp_rmatrixinverse(ref double[,] a, int n, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv._pexec_rmatrixinverse(ref a, n, ref info, rep.innerobj); return; } public static void rmatrixinverse(ref double[,] a, out int info, out matinvreport rep) { int n; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'rmatrixinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); matinv.rmatrixinverse(ref a, n, ref info, rep.innerobj); return; } public static void smp_rmatrixinverse(ref double[,] a, out int info, out matinvreport rep) { int n; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'rmatrixinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); matinv._pexec_rmatrixinverse(ref a, n, ref info, rep.innerobj); return; } /************************************************************************* Inversion of a matrix given by its LU decomposition. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that matrix inversion is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - LU decomposition of the matrix (output of CMatrixLU subroutine). Pivots - table of permutations (the output of CMatrixLU subroutine). N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) OUTPUT PARAMETERS: Info - return code, same as in RMatrixLUInverse Rep - solver report, same as in RMatrixLUInverse A - inverse of matrix A, same as in RMatrixLUInverse -- ALGLIB routine -- 05.02.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixluinverse(ref complex[,] a, int[] pivots, int n, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv.cmatrixluinverse(ref a, pivots, n, ref info, rep.innerobj); return; } public static void smp_cmatrixluinverse(ref complex[,] a, int[] pivots, int n, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv._pexec_cmatrixluinverse(ref a, pivots, n, ref info, rep.innerobj); return; } public static void cmatrixluinverse(ref complex[,] a, int[] pivots, out int info, out matinvreport rep) { int n; if( (ap.cols(a)!=ap.rows(a)) || (ap.cols(a)!=ap.len(pivots))) throw new alglibexception("Error while calling 'cmatrixluinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); matinv.cmatrixluinverse(ref a, pivots, n, ref info, rep.innerobj); return; } public static void smp_cmatrixluinverse(ref complex[,] a, int[] pivots, out int info, out matinvreport rep) { int n; if( (ap.cols(a)!=ap.rows(a)) || (ap.cols(a)!=ap.len(pivots))) throw new alglibexception("Error while calling 'cmatrixluinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); matinv._pexec_cmatrixluinverse(ref a, pivots, n, ref info, rep.innerobj); return; } /************************************************************************* Inversion of a general matrix. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that matrix inversion is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) Output parameters: Info - return code, same as in RMatrixLUInverse Rep - solver report, same as in RMatrixLUInverse A - inverse of matrix A, same as in RMatrixLUInverse -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static void cmatrixinverse(ref complex[,] a, int n, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv.cmatrixinverse(ref a, n, ref info, rep.innerobj); return; } public static void smp_cmatrixinverse(ref complex[,] a, int n, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv._pexec_cmatrixinverse(ref a, n, ref info, rep.innerobj); return; } public static void cmatrixinverse(ref complex[,] a, out int info, out matinvreport rep) { int n; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'cmatrixinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); matinv.cmatrixinverse(ref a, n, ref info, rep.innerobj); return; } public static void smp_cmatrixinverse(ref complex[,] a, out int info, out matinvreport rep) { int n; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'cmatrixinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); matinv._pexec_cmatrixinverse(ref a, n, ref info, rep.innerobj); return; } /************************************************************************* Inversion of a symmetric positive definite matrix which is given by Cholesky decomposition. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. However, Cholesky inversion is a "difficult" ! algorithm - it has lots of internal synchronization points which ! prevents efficient parallelization of algorithm. Only very large ! problems (N=thousands) can be efficiently parallelized. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - Cholesky decomposition of the matrix to be inverted: A=U’*U or A = L*L'. Output of SPDMatrixCholesky subroutine. N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) IsUpper - storage type (optional): * if True, symmetric matrix A is given by its upper triangle, and the lower triangle isn’t used/changed by function * if False, symmetric matrix A is given by its lower triangle, and the upper triangle isn’t used/changed by function * if not given, lower half is used. Output parameters: Info - return code, same as in RMatrixLUInverse Rep - solver report, same as in RMatrixLUInverse A - inverse of matrix A, same as in RMatrixLUInverse -- ALGLIB routine -- 10.02.2010 Bochkanov Sergey *************************************************************************/ public static void spdmatrixcholeskyinverse(ref double[,] a, int n, bool isupper, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv.spdmatrixcholeskyinverse(ref a, n, isupper, ref info, rep.innerobj); return; } public static void smp_spdmatrixcholeskyinverse(ref double[,] a, int n, bool isupper, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv._pexec_spdmatrixcholeskyinverse(ref a, n, isupper, ref info, rep.innerobj); return; } public static void spdmatrixcholeskyinverse(ref double[,] a, out int info, out matinvreport rep) { int n; bool isupper; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'spdmatrixcholeskyinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); isupper = false; matinv.spdmatrixcholeskyinverse(ref a, n, isupper, ref info, rep.innerobj); return; } public static void smp_spdmatrixcholeskyinverse(ref double[,] a, out int info, out matinvreport rep) { int n; bool isupper; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'spdmatrixcholeskyinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); isupper = false; matinv._pexec_spdmatrixcholeskyinverse(ref a, n, isupper, ref info, rep.innerobj); return; } /************************************************************************* Inversion of a symmetric positive definite matrix. Given an upper or lower triangle of a symmetric positive definite matrix, the algorithm generates matrix A^-1 and saves the upper or lower triangle depending on the input. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. However, Cholesky inversion is a "difficult" ! algorithm - it has lots of internal synchronization points which ! prevents efficient parallelization of algorithm. Only very large ! problems (N=thousands) can be efficiently parallelized. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix to be inverted (upper or lower triangle). Array with elements [0..N-1,0..N-1]. N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) IsUpper - storage type (optional): * if True, symmetric matrix A is given by its upper triangle, and the lower triangle isn’t used/changed by function * if False, symmetric matrix A is given by its lower triangle, and the upper triangle isn’t used/changed by function * if not given, both lower and upper triangles must be filled. Output parameters: Info - return code, same as in RMatrixLUInverse Rep - solver report, same as in RMatrixLUInverse A - inverse of matrix A, same as in RMatrixLUInverse -- ALGLIB routine -- 10.02.2010 Bochkanov Sergey *************************************************************************/ public static void spdmatrixinverse(ref double[,] a, int n, bool isupper, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv.spdmatrixinverse(ref a, n, isupper, ref info, rep.innerobj); return; } public static void smp_spdmatrixinverse(ref double[,] a, int n, bool isupper, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv._pexec_spdmatrixinverse(ref a, n, isupper, ref info, rep.innerobj); return; } public static void spdmatrixinverse(ref double[,] a, out int info, out matinvreport rep) { int n; bool isupper; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'spdmatrixinverse': looks like one of arguments has wrong size"); if( !alglib.ap.issymmetric(a) ) throw new alglibexception("'a' parameter is not symmetric matrix"); info = 0; rep = new matinvreport(); n = ap.cols(a); isupper = false; matinv.spdmatrixinverse(ref a, n, isupper, ref info, rep.innerobj); if( !alglib.ap.forcesymmetric(a) ) throw new alglibexception("Internal error while forcing symmetricity of 'a' parameter"); return; } public static void smp_spdmatrixinverse(ref double[,] a, out int info, out matinvreport rep) { int n; bool isupper; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'spdmatrixinverse': looks like one of arguments has wrong size"); if( !alglib.ap.issymmetric(a) ) throw new alglibexception("'a' parameter is not symmetric matrix"); info = 0; rep = new matinvreport(); n = ap.cols(a); isupper = false; matinv._pexec_spdmatrixinverse(ref a, n, isupper, ref info, rep.innerobj); if( !alglib.ap.forcesymmetric(a) ) throw new alglibexception("Internal error while forcing symmetricity of 'a' parameter"); return; } /************************************************************************* Inversion of a Hermitian positive definite matrix which is given by Cholesky decomposition. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. However, Cholesky inversion is a "difficult" ! algorithm - it has lots of internal synchronization points which ! prevents efficient parallelization of algorithm. Only very large ! problems (N=thousands) can be efficiently parallelized. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - Cholesky decomposition of the matrix to be inverted: A=U’*U or A = L*L'. Output of HPDMatrixCholesky subroutine. N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) IsUpper - storage type (optional): * if True, symmetric matrix A is given by its upper triangle, and the lower triangle isn’t used/changed by function * if False, symmetric matrix A is given by its lower triangle, and the upper triangle isn’t used/changed by function * if not given, lower half is used. Output parameters: Info - return code, same as in RMatrixLUInverse Rep - solver report, same as in RMatrixLUInverse A - inverse of matrix A, same as in RMatrixLUInverse -- ALGLIB routine -- 10.02.2010 Bochkanov Sergey *************************************************************************/ public static void hpdmatrixcholeskyinverse(ref complex[,] a, int n, bool isupper, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv.hpdmatrixcholeskyinverse(ref a, n, isupper, ref info, rep.innerobj); return; } public static void smp_hpdmatrixcholeskyinverse(ref complex[,] a, int n, bool isupper, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv._pexec_hpdmatrixcholeskyinverse(ref a, n, isupper, ref info, rep.innerobj); return; } public static void hpdmatrixcholeskyinverse(ref complex[,] a, out int info, out matinvreport rep) { int n; bool isupper; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'hpdmatrixcholeskyinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); isupper = false; matinv.hpdmatrixcholeskyinverse(ref a, n, isupper, ref info, rep.innerobj); return; } public static void smp_hpdmatrixcholeskyinverse(ref complex[,] a, out int info, out matinvreport rep) { int n; bool isupper; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'hpdmatrixcholeskyinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); isupper = false; matinv._pexec_hpdmatrixcholeskyinverse(ref a, n, isupper, ref info, rep.innerobj); return; } /************************************************************************* Inversion of a Hermitian positive definite matrix. Given an upper or lower triangle of a Hermitian positive definite matrix, the algorithm generates matrix A^-1 and saves the upper or lower triangle depending on the input. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. However, Cholesky inversion is a "difficult" ! algorithm - it has lots of internal synchronization points which ! prevents efficient parallelization of algorithm. Only very large ! problems (N=thousands) can be efficiently parallelized. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix to be inverted (upper or lower triangle). Array with elements [0..N-1,0..N-1]. N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) IsUpper - storage type (optional): * if True, symmetric matrix A is given by its upper triangle, and the lower triangle isn’t used/changed by function * if False, symmetric matrix A is given by its lower triangle, and the upper triangle isn’t used/changed by function * if not given, both lower and upper triangles must be filled. Output parameters: Info - return code, same as in RMatrixLUInverse Rep - solver report, same as in RMatrixLUInverse A - inverse of matrix A, same as in RMatrixLUInverse -- ALGLIB routine -- 10.02.2010 Bochkanov Sergey *************************************************************************/ public static void hpdmatrixinverse(ref complex[,] a, int n, bool isupper, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv.hpdmatrixinverse(ref a, n, isupper, ref info, rep.innerobj); return; } public static void smp_hpdmatrixinverse(ref complex[,] a, int n, bool isupper, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv._pexec_hpdmatrixinverse(ref a, n, isupper, ref info, rep.innerobj); return; } public static void hpdmatrixinverse(ref complex[,] a, out int info, out matinvreport rep) { int n; bool isupper; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'hpdmatrixinverse': looks like one of arguments has wrong size"); if( !alglib.ap.ishermitian(a) ) throw new alglibexception("'a' parameter is not Hermitian matrix"); info = 0; rep = new matinvreport(); n = ap.cols(a); isupper = false; matinv.hpdmatrixinverse(ref a, n, isupper, ref info, rep.innerobj); if( !alglib.ap.forcehermitian(a) ) throw new alglibexception("Internal error while forcing Hermitian properties of 'a' parameter"); return; } public static void smp_hpdmatrixinverse(ref complex[,] a, out int info, out matinvreport rep) { int n; bool isupper; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'hpdmatrixinverse': looks like one of arguments has wrong size"); if( !alglib.ap.ishermitian(a) ) throw new alglibexception("'a' parameter is not Hermitian matrix"); info = 0; rep = new matinvreport(); n = ap.cols(a); isupper = false; matinv._pexec_hpdmatrixinverse(ref a, n, isupper, ref info, rep.innerobj); if( !alglib.ap.forcehermitian(a) ) throw new alglibexception("Internal error while forcing Hermitian properties of 'a' parameter"); return; } /************************************************************************* Triangular matrix inverse (real) The subroutine inverts the following types of matrices: * upper triangular * upper triangular with unit diagonal * lower triangular * lower triangular with unit diagonal In case of an upper (lower) triangular matrix, the inverse matrix will also be upper (lower) triangular, and after the end of the algorithm, the inverse matrix replaces the source matrix. The elements below (above) the main diagonal are not changed by the algorithm. If the matrix has a unit diagonal, the inverse matrix also has a unit diagonal, and the diagonal elements are not passed to the algorithm. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that triangular inverse is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix, array[0..N-1, 0..N-1]. N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) IsUpper - True, if the matrix is upper triangular. IsUnit - diagonal type (optional): * if True, matrix has unit diagonal (a[i,i] are NOT used) * if False, matrix diagonal is arbitrary * if not given, False is assumed Output parameters: Info - same as for RMatrixLUInverse Rep - same as for RMatrixLUInverse A - same as for RMatrixLUInverse. -- ALGLIB -- Copyright 05.02.2010 by Bochkanov Sergey *************************************************************************/ public static void rmatrixtrinverse(ref double[,] a, int n, bool isupper, bool isunit, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv.rmatrixtrinverse(ref a, n, isupper, isunit, ref info, rep.innerobj); return; } public static void smp_rmatrixtrinverse(ref double[,] a, int n, bool isupper, bool isunit, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv._pexec_rmatrixtrinverse(ref a, n, isupper, isunit, ref info, rep.innerobj); return; } public static void rmatrixtrinverse(ref double[,] a, bool isupper, out int info, out matinvreport rep) { int n; bool isunit; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'rmatrixtrinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); isunit = false; matinv.rmatrixtrinverse(ref a, n, isupper, isunit, ref info, rep.innerobj); return; } public static void smp_rmatrixtrinverse(ref double[,] a, bool isupper, out int info, out matinvreport rep) { int n; bool isunit; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'rmatrixtrinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); isunit = false; matinv._pexec_rmatrixtrinverse(ref a, n, isupper, isunit, ref info, rep.innerobj); return; } /************************************************************************* Triangular matrix inverse (complex) The subroutine inverts the following types of matrices: * upper triangular * upper triangular with unit diagonal * lower triangular * lower triangular with unit diagonal In case of an upper (lower) triangular matrix, the inverse matrix will also be upper (lower) triangular, and after the end of the algorithm, the inverse matrix replaces the source matrix. The elements below (above) the main diagonal are not changed by the algorithm. If the matrix has a unit diagonal, the inverse matrix also has a unit diagonal, and the diagonal elements are not passed to the algorithm. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that triangular inverse is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix, array[0..N-1, 0..N-1]. N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) IsUpper - True, if the matrix is upper triangular. IsUnit - diagonal type (optional): * if True, matrix has unit diagonal (a[i,i] are NOT used) * if False, matrix diagonal is arbitrary * if not given, False is assumed Output parameters: Info - same as for RMatrixLUInverse Rep - same as for RMatrixLUInverse A - same as for RMatrixLUInverse. -- ALGLIB -- Copyright 05.02.2010 by Bochkanov Sergey *************************************************************************/ public static void cmatrixtrinverse(ref complex[,] a, int n, bool isupper, bool isunit, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv.cmatrixtrinverse(ref a, n, isupper, isunit, ref info, rep.innerobj); return; } public static void smp_cmatrixtrinverse(ref complex[,] a, int n, bool isupper, bool isunit, out int info, out matinvreport rep) { info = 0; rep = new matinvreport(); matinv._pexec_cmatrixtrinverse(ref a, n, isupper, isunit, ref info, rep.innerobj); return; } public static void cmatrixtrinverse(ref complex[,] a, bool isupper, out int info, out matinvreport rep) { int n; bool isunit; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'cmatrixtrinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); isunit = false; matinv.cmatrixtrinverse(ref a, n, isupper, isunit, ref info, rep.innerobj); return; } public static void smp_cmatrixtrinverse(ref complex[,] a, bool isupper, out int info, out matinvreport rep) { int n; bool isunit; if( (ap.cols(a)!=ap.rows(a))) throw new alglibexception("Error while calling 'cmatrixtrinverse': looks like one of arguments has wrong size"); info = 0; rep = new matinvreport(); n = ap.cols(a); isunit = false; matinv._pexec_cmatrixtrinverse(ref a, n, isupper, isunit, ref info, rep.innerobj); return; } } public partial class alglib { } public partial class alglib { /************************************************************************* This object stores state of the iterative norm estimation algorithm. You should use ALGLIB functions to work with this object. *************************************************************************/ public class normestimatorstate : alglibobject { // // Public declarations // public normestimatorstate() { _innerobj = new normestimator.normestimatorstate(); } public override alglib.alglibobject make_copy() { return new normestimatorstate((normestimator.normestimatorstate)_innerobj.make_copy()); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private normestimator.normestimatorstate _innerobj; public normestimator.normestimatorstate innerobj { get { return _innerobj; } } public normestimatorstate(normestimator.normestimatorstate obj) { _innerobj = obj; } } /************************************************************************* This procedure initializes matrix norm estimator. USAGE: 1. User initializes algorithm state with NormEstimatorCreate() call 2. User calls NormEstimatorEstimateSparse() (or NormEstimatorIteration()) 3. User calls NormEstimatorResults() to get solution. INPUT PARAMETERS: M - number of rows in the matrix being estimated, M>0 N - number of columns in the matrix being estimated, N>0 NStart - number of random starting vectors recommended value - at least 5. NIts - number of iterations to do with best starting vector recommended value - at least 5. OUTPUT PARAMETERS: State - structure which stores algorithm state NOTE: this algorithm is effectively deterministic, i.e. it always returns same result when repeatedly called for the same matrix. In fact, algorithm uses randomized starting vectors, but internal random numbers generator always generates same sequence of the random values (it is a feature, not bug). Algorithm can be made non-deterministic with NormEstimatorSetSeed(0) call. -- ALGLIB -- Copyright 06.12.2011 by Bochkanov Sergey *************************************************************************/ public static void normestimatorcreate(int m, int n, int nstart, int nits, out normestimatorstate state) { state = new normestimatorstate(); normestimator.normestimatorcreate(m, n, nstart, nits, state.innerobj); return; } /************************************************************************* This function changes seed value used by algorithm. In some cases we need deterministic processing, i.e. subsequent calls must return equal results, in other cases we need non-deterministic algorithm which returns different results for the same matrix on every pass. Setting zero seed will lead to non-deterministic algorithm, while non-zero value will make our algorithm deterministic. INPUT PARAMETERS: State - norm estimator state, must be initialized with a call to NormEstimatorCreate() SeedVal - seed value, >=0. Zero value = non-deterministic algo. -- ALGLIB -- Copyright 06.12.2011 by Bochkanov Sergey *************************************************************************/ public static void normestimatorsetseed(normestimatorstate state, int seedval) { normestimator.normestimatorsetseed(state.innerobj, seedval); return; } /************************************************************************* This function estimates norm of the sparse M*N matrix A. INPUT PARAMETERS: State - norm estimator state, must be initialized with a call to NormEstimatorCreate() A - sparse M*N matrix, must be converted to CRS format prior to calling this function. After this function is over you can call NormEstimatorResults() to get estimate of the norm(A). -- ALGLIB -- Copyright 06.12.2011 by Bochkanov Sergey *************************************************************************/ public static void normestimatorestimatesparse(normestimatorstate state, sparsematrix a) { normestimator.normestimatorestimatesparse(state.innerobj, a.innerobj); return; } /************************************************************************* Matrix norm estimation results INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: Nrm - estimate of the matrix norm, Nrm>=0 -- ALGLIB -- Copyright 06.12.2011 by Bochkanov Sergey *************************************************************************/ public static void normestimatorresults(normestimatorstate state, out double nrm) { nrm = 0; normestimator.normestimatorresults(state.innerobj, ref nrm); return; } } public partial class alglib { /************************************************************************* Determinant calculation of the matrix given by its LU decomposition. Input parameters: A - LU decomposition of the matrix (output of RMatrixLU subroutine). Pivots - table of permutations which were made during the LU decomposition. Output of RMatrixLU subroutine. N - (optional) size of matrix A: * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, automatically determined from matrix size (A must be square matrix) Result: matrix determinant. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static double rmatrixludet(double[,] a, int[] pivots, int n) { double result = matdet.rmatrixludet(a, pivots, n); return result; } public static double rmatrixludet(double[,] a, int[] pivots) { int n; if( (ap.rows(a)!=ap.cols(a)) || (ap.rows(a)!=ap.len(pivots))) throw new alglibexception("Error while calling 'rmatrixludet': looks like one of arguments has wrong size"); n = ap.rows(a); double result = matdet.rmatrixludet(a, pivots, n); return result; } /************************************************************************* Calculation of the determinant of a general matrix Input parameters: A - matrix, array[0..N-1, 0..N-1] N - (optional) size of matrix A: * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, automatically determined from matrix size (A must be square matrix) Result: determinant of matrix A. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static double rmatrixdet(double[,] a, int n) { double result = matdet.rmatrixdet(a, n); return result; } public static double rmatrixdet(double[,] a) { int n; if( (ap.rows(a)!=ap.cols(a))) throw new alglibexception("Error while calling 'rmatrixdet': looks like one of arguments has wrong size"); n = ap.rows(a); double result = matdet.rmatrixdet(a, n); return result; } /************************************************************************* Determinant calculation of the matrix given by its LU decomposition. Input parameters: A - LU decomposition of the matrix (output of RMatrixLU subroutine). Pivots - table of permutations which were made during the LU decomposition. Output of RMatrixLU subroutine. N - (optional) size of matrix A: * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, automatically determined from matrix size (A must be square matrix) Result: matrix determinant. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static complex cmatrixludet(complex[,] a, int[] pivots, int n) { complex result = matdet.cmatrixludet(a, pivots, n); return result; } public static complex cmatrixludet(complex[,] a, int[] pivots) { int n; if( (ap.rows(a)!=ap.cols(a)) || (ap.rows(a)!=ap.len(pivots))) throw new alglibexception("Error while calling 'cmatrixludet': looks like one of arguments has wrong size"); n = ap.rows(a); complex result = matdet.cmatrixludet(a, pivots, n); return result; } /************************************************************************* Calculation of the determinant of a general matrix Input parameters: A - matrix, array[0..N-1, 0..N-1] N - (optional) size of matrix A: * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, automatically determined from matrix size (A must be square matrix) Result: determinant of matrix A. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static complex cmatrixdet(complex[,] a, int n) { complex result = matdet.cmatrixdet(a, n); return result; } public static complex cmatrixdet(complex[,] a) { int n; if( (ap.rows(a)!=ap.cols(a))) throw new alglibexception("Error while calling 'cmatrixdet': looks like one of arguments has wrong size"); n = ap.rows(a); complex result = matdet.cmatrixdet(a, n); return result; } /************************************************************************* Determinant calculation of the matrix given by the Cholesky decomposition. Input parameters: A - Cholesky decomposition, output of SMatrixCholesky subroutine. N - (optional) size of matrix A: * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, automatically determined from matrix size (A must be square matrix) As the determinant is equal to the product of squares of diagonal elements, it’s not necessary to specify which triangle - lower or upper - the matrix is stored in. Result: matrix determinant. -- ALGLIB -- Copyright 2005-2008 by Bochkanov Sergey *************************************************************************/ public static double spdmatrixcholeskydet(double[,] a, int n) { double result = matdet.spdmatrixcholeskydet(a, n); return result; } public static double spdmatrixcholeskydet(double[,] a) { int n; if( (ap.rows(a)!=ap.cols(a))) throw new alglibexception("Error while calling 'spdmatrixcholeskydet': looks like one of arguments has wrong size"); n = ap.rows(a); double result = matdet.spdmatrixcholeskydet(a, n); return result; } /************************************************************************* Determinant calculation of the symmetric positive definite matrix. Input parameters: A - matrix. Array with elements [0..N-1, 0..N-1]. N - (optional) size of matrix A: * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, automatically determined from matrix size (A must be square matrix) IsUpper - (optional) storage type: * if True, symmetric matrix A is given by its upper triangle, and the lower triangle isn’t used/changed by function * if False, symmetric matrix A is given by its lower triangle, and the upper triangle isn’t used/changed by function * if not given, both lower and upper triangles must be filled. Result: determinant of matrix A. If matrix A is not positive definite, exception is thrown. -- ALGLIB -- Copyright 2005-2008 by Bochkanov Sergey *************************************************************************/ public static double spdmatrixdet(double[,] a, int n, bool isupper) { double result = matdet.spdmatrixdet(a, n, isupper); return result; } public static double spdmatrixdet(double[,] a) { int n; bool isupper; if( (ap.rows(a)!=ap.cols(a))) throw new alglibexception("Error while calling 'spdmatrixdet': looks like one of arguments has wrong size"); if( !alglib.ap.issymmetric(a) ) throw new alglibexception("'a' parameter is not symmetric matrix"); n = ap.rows(a); isupper = false; double result = matdet.spdmatrixdet(a, n, isupper); return result; } } public partial class alglib { /************************************************************************* Algorithm for solving the following generalized symmetric positive-definite eigenproblem: A*x = lambda*B*x (1) or A*B*x = lambda*x (2) or B*A*x = lambda*x (3). where A is a symmetric matrix, B - symmetric positive-definite matrix. The problem is solved by reducing it to an ordinary symmetric eigenvalue problem. Input parameters: A - symmetric matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrices A and B. IsUpperA - storage format of matrix A. B - symmetric positive-definite matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. IsUpperB - storage format of matrix B. ZNeeded - if ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. ProblemType - if ProblemType is equal to: * 1, the following problem is solved: A*x = lambda*B*x; * 2, the following problem is solved: A*B*x = lambda*x; * 3, the following problem is solved: B*A*x = lambda*x. Output parameters: D - eigenvalues in ascending order. Array whose index ranges within [0..N-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains eigenvectors. Array whose indexes range within [0..N-1, 0..N-1]. The eigenvectors are stored in matrix columns. It should be noted that the eigenvectors in such problems do not form an orthogonal system. Result: True, if the problem was solved successfully. False, if the error occurred during the Cholesky decomposition of matrix B (the matrix isn’t positive-definite) or during the work of the iterative algorithm for solving the symmetric eigenproblem. See also the GeneralizedSymmetricDefiniteEVDReduce subroutine. -- ALGLIB -- Copyright 1.28.2006 by Bochkanov Sergey *************************************************************************/ public static bool smatrixgevd(double[,] a, int n, bool isuppera, double[,] b, bool isupperb, int zneeded, int problemtype, out double[] d, out double[,] z) { d = new double[0]; z = new double[0,0]; bool result = spdgevd.smatrixgevd(a, n, isuppera, b, isupperb, zneeded, problemtype, ref d, ref z); return result; } /************************************************************************* Algorithm for reduction of the following generalized symmetric positive- definite eigenvalue problem: A*x = lambda*B*x (1) or A*B*x = lambda*x (2) or B*A*x = lambda*x (3) to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and the given problems are the same, and the eigenvectors of the given problem could be obtained by multiplying the obtained eigenvectors by the transformation matrix x = R*y). Here A is a symmetric matrix, B - symmetric positive-definite matrix. Input parameters: A - symmetric matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrices A and B. IsUpperA - storage format of matrix A. B - symmetric positive-definite matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. IsUpperB - storage format of matrix B. ProblemType - if ProblemType is equal to: * 1, the following problem is solved: A*x = lambda*B*x; * 2, the following problem is solved: A*B*x = lambda*x; * 3, the following problem is solved: B*A*x = lambda*x. Output parameters: A - symmetric matrix which is given by its upper or lower triangle depending on IsUpperA. Contains matrix C. Array whose indexes range within [0..N-1, 0..N-1]. R - upper triangular or low triangular transformation matrix which is used to obtain the eigenvectors of a given problem as the product of eigenvectors of C (from the right) and matrix R (from the left). If the matrix is upper triangular, the elements below the main diagonal are equal to 0 (and vice versa). Thus, we can perform the multiplication without taking into account the internal structure (which is an easier though less effective way). Array whose indexes range within [0..N-1, 0..N-1]. IsUpperR - type of matrix R (upper or lower triangular). Result: True, if the problem was reduced successfully. False, if the error occurred during the Cholesky decomposition of matrix B (the matrix is not positive-definite). -- ALGLIB -- Copyright 1.28.2006 by Bochkanov Sergey *************************************************************************/ public static bool smatrixgevdreduce(ref double[,] a, int n, bool isuppera, double[,] b, bool isupperb, int problemtype, out double[,] r, out bool isupperr) { r = new double[0,0]; isupperr = false; bool result = spdgevd.smatrixgevdreduce(ref a, n, isuppera, b, isupperb, problemtype, ref r, ref isupperr); return result; } } public partial class alglib { /************************************************************************* Inverse matrix update by the Sherman-Morrison formula The algorithm updates matrix A^-1 when adding a number to an element of matrix A. Input parameters: InvA - inverse of matrix A. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. UpdRow - row where the element to be updated is stored. UpdColumn - column where the element to be updated is stored. UpdVal - a number to be added to the element. Output parameters: InvA - inverse of modified matrix A. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static void rmatrixinvupdatesimple(ref double[,] inva, int n, int updrow, int updcolumn, double updval) { inverseupdate.rmatrixinvupdatesimple(ref inva, n, updrow, updcolumn, updval); return; } /************************************************************************* Inverse matrix update by the Sherman-Morrison formula The algorithm updates matrix A^-1 when adding a vector to a row of matrix A. Input parameters: InvA - inverse of matrix A. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. UpdRow - the row of A whose vector V was added. 0 <= Row <= N-1 V - the vector to be added to a row. Array whose index ranges within [0..N-1]. Output parameters: InvA - inverse of modified matrix A. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static void rmatrixinvupdaterow(ref double[,] inva, int n, int updrow, double[] v) { inverseupdate.rmatrixinvupdaterow(ref inva, n, updrow, v); return; } /************************************************************************* Inverse matrix update by the Sherman-Morrison formula The algorithm updates matrix A^-1 when adding a vector to a column of matrix A. Input parameters: InvA - inverse of matrix A. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. UpdColumn - the column of A whose vector U was added. 0 <= UpdColumn <= N-1 U - the vector to be added to a column. Array whose index ranges within [0..N-1]. Output parameters: InvA - inverse of modified matrix A. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static void rmatrixinvupdatecolumn(ref double[,] inva, int n, int updcolumn, double[] u) { inverseupdate.rmatrixinvupdatecolumn(ref inva, n, updcolumn, u); return; } /************************************************************************* Inverse matrix update by the Sherman-Morrison formula The algorithm computes the inverse of matrix A+u*v’ by using the given matrix A^-1 and the vectors u and v. Input parameters: InvA - inverse of matrix A. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. U - the vector modifying the matrix. Array whose index ranges within [0..N-1]. V - the vector modifying the matrix. Array whose index ranges within [0..N-1]. Output parameters: InvA - inverse of matrix A + u*v'. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static void rmatrixinvupdateuv(ref double[,] inva, int n, double[] u, double[] v) { inverseupdate.rmatrixinvupdateuv(ref inva, n, u, v); return; } } public partial class alglib { /************************************************************************* Subroutine performing the Schur decomposition of a general matrix by using the QR algorithm with multiple shifts. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. The source matrix A is represented as S'*A*S = T, where S is an orthogonal matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of sizes 1x1 and 2x2 on the main diagonal). Input parameters: A - matrix to be decomposed. Array whose indexes range within [0..N-1, 0..N-1]. N - size of A, N>=0. Output parameters: A - contains matrix T. Array whose indexes range within [0..N-1, 0..N-1]. S - contains Schur vectors. Array whose indexes range within [0..N-1, 0..N-1]. Note 1: The block structure of matrix T can be easily recognized: since all the elements below the blocks are zeros, the elements a[i+1,i] which are equal to 0 show the block border. Note 2: The algorithm performance depends on the value of the internal parameter NS of the InternalSchurDecomposition subroutine which defines the number of shifts in the QR algorithm (similarly to the block width in block-matrix algorithms in linear algebra). If you require maximum performance on your machine, it is recommended to adjust this parameter manually. Result: True, if the algorithm has converged and parameters A and S contain the result. False, if the algorithm has not converged. Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library). *************************************************************************/ public static bool rmatrixschur(ref double[,] a, int n, out double[,] s) { s = new double[0,0]; bool result = schur.rmatrixschur(ref a, n, ref s); return result; } } public partial class alglib { public class ablas { public const int rgemmparallelsize = 64; public const int cgemmparallelsize = 64; /************************************************************************* Splits matrix length in two parts, left part should match ABLAS block size INPUT PARAMETERS A - real matrix, is passed to ensure that we didn't split complex matrix using real splitting subroutine. matrix itself is not changed. N - length, N>0 OUTPUT PARAMETERS N1 - length N2 - length N1+N2=N, N1>=N2, N2 may be zero -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static void ablassplitlength(double[,] a, int n, ref int n1, ref int n2) { n1 = 0; n2 = 0; if( n>ablasblocksize(a) ) { ablasinternalsplitlength(n, ablasblocksize(a), ref n1, ref n2); } else { ablasinternalsplitlength(n, ablasmicroblocksize(), ref n1, ref n2); } } /************************************************************************* Complex ABLASSplitLength -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static void ablascomplexsplitlength(complex[,] a, int n, ref int n1, ref int n2) { n1 = 0; n2 = 0; if( n>ablascomplexblocksize(a) ) { ablasinternalsplitlength(n, ablascomplexblocksize(a), ref n1, ref n2); } else { ablasinternalsplitlength(n, ablasmicroblocksize(), ref n1, ref n2); } } /************************************************************************* Returns block size - subdivision size where cache-oblivious soubroutines switch to the optimized kernel. INPUT PARAMETERS A - real matrix, is passed to ensure that we didn't split complex matrix using real splitting subroutine. matrix itself is not changed. -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static int ablasblocksize(double[,] a) { int result = 0; result = 32; return result; } /************************************************************************* Block size for complex subroutines. -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static int ablascomplexblocksize(complex[,] a) { int result = 0; result = 24; return result; } /************************************************************************* Microblock size -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static int ablasmicroblocksize() { int result = 0; result = 8; return result; } /************************************************************************* Cache-oblivous complex "copy-and-transpose" Input parameters: M - number of rows N - number of columns A - source matrix, MxN submatrix is copied and transposed IA - submatrix offset (row index) JA - submatrix offset (column index) B - destination matrix, must be large enough to store result IB - submatrix offset (row index) JB - submatrix offset (column index) *************************************************************************/ public static void cmatrixtranspose(int m, int n, complex[,] a, int ia, int ja, ref complex[,] b, int ib, int jb) { int i = 0; int s1 = 0; int s2 = 0; int i_ = 0; int i1_ = 0; if( m<=2*ablascomplexblocksize(a) && n<=2*ablascomplexblocksize(a) ) { // // base case // for(i=0; i<=m-1; i++) { i1_ = (ja) - (ib); for(i_=ib; i_<=ib+n-1;i_++) { b[i_,jb+i] = a[ia+i,i_+i1_]; } } } else { // // Cache-oblivious recursion // if( m>n ) { ablascomplexsplitlength(a, m, ref s1, ref s2); cmatrixtranspose(s1, n, a, ia, ja, ref b, ib, jb); cmatrixtranspose(s2, n, a, ia+s1, ja, ref b, ib, jb+s1); } else { ablascomplexsplitlength(a, n, ref s1, ref s2); cmatrixtranspose(m, s1, a, ia, ja, ref b, ib, jb); cmatrixtranspose(m, s2, a, ia, ja+s1, ref b, ib+s1, jb); } } } /************************************************************************* Cache-oblivous real "copy-and-transpose" Input parameters: M - number of rows N - number of columns A - source matrix, MxN submatrix is copied and transposed IA - submatrix offset (row index) JA - submatrix offset (column index) B - destination matrix, must be large enough to store result IB - submatrix offset (row index) JB - submatrix offset (column index) *************************************************************************/ public static void rmatrixtranspose(int m, int n, double[,] a, int ia, int ja, double[,] b, int ib, int jb) { int i = 0; int s1 = 0; int s2 = 0; int i_ = 0; int i1_ = 0; if( m<=2*ablasblocksize(a) && n<=2*ablasblocksize(a) ) { // // base case // for(i=0; i<=m-1; i++) { i1_ = (ja) - (ib); for(i_=ib; i_<=ib+n-1;i_++) { b[i_,jb+i] = a[ia+i,i_+i1_]; } } } else { // // Cache-oblivious recursion // if( m>n ) { ablassplitlength(a, m, ref s1, ref s2); rmatrixtranspose(s1, n, a, ia, ja, b, ib, jb); rmatrixtranspose(s2, n, a, ia+s1, ja, b, ib, jb+s1); } else { ablassplitlength(a, n, ref s1, ref s2); rmatrixtranspose(m, s1, a, ia, ja, b, ib, jb); rmatrixtranspose(m, s2, a, ia, ja+s1, b, ib+s1, jb); } } } /************************************************************************* This code enforces symmetricy of the matrix by copying Upper part to lower one (or vice versa). INPUT PARAMETERS: A - matrix N - number of rows/columns IsUpper - whether we want to copy upper triangle to lower one (True) or vice versa (False). *************************************************************************/ public static void rmatrixenforcesymmetricity(double[,] a, int n, bool isupper) { int i = 0; int j = 0; if( isupper ) { for(i=0; i<=n-1; i++) { for(j=i+1; j<=n-1; j++) { a[j,i] = a[i,j]; } } } else { for(i=0; i<=n-1; i++) { for(j=i+1; j<=n-1; j++) { a[i,j] = a[j,i]; } } } } /************************************************************************* Copy Input parameters: M - number of rows N - number of columns A - source matrix, MxN submatrix is copied and transposed IA - submatrix offset (row index) JA - submatrix offset (column index) B - destination matrix, must be large enough to store result IB - submatrix offset (row index) JB - submatrix offset (column index) *************************************************************************/ public static void cmatrixcopy(int m, int n, complex[,] a, int ia, int ja, ref complex[,] b, int ib, int jb) { int i = 0; int i_ = 0; int i1_ = 0; if( m==0 || n==0 ) { return; } for(i=0; i<=m-1; i++) { i1_ = (ja) - (jb); for(i_=jb; i_<=jb+n-1;i_++) { b[ib+i,i_] = a[ia+i,i_+i1_]; } } } /************************************************************************* Copy Input parameters: M - number of rows N - number of columns A - source matrix, MxN submatrix is copied and transposed IA - submatrix offset (row index) JA - submatrix offset (column index) B - destination matrix, must be large enough to store result IB - submatrix offset (row index) JB - submatrix offset (column index) *************************************************************************/ public static void rmatrixcopy(int m, int n, double[,] a, int ia, int ja, ref double[,] b, int ib, int jb) { int i = 0; int i_ = 0; int i1_ = 0; if( m==0 || n==0 ) { return; } for(i=0; i<=m-1; i++) { i1_ = (ja) - (jb); for(i_=jb; i_<=jb+n-1;i_++) { b[ib+i,i_] = a[ia+i,i_+i1_]; } } } /************************************************************************* Rank-1 correction: A := A + u*v' INPUT PARAMETERS: M - number of rows N - number of columns A - target matrix, MxN submatrix is updated IA - submatrix offset (row index) JA - submatrix offset (column index) U - vector #1 IU - subvector offset V - vector #2 IV - subvector offset *************************************************************************/ public static void cmatrixrank1(int m, int n, ref complex[,] a, int ia, int ja, ref complex[] u, int iu, ref complex[] v, int iv) { int i = 0; complex s = 0; int i_ = 0; int i1_ = 0; if( m==0 || n==0 ) { return; } if( ablasf.cmatrixrank1f(m, n, ref a, ia, ja, ref u, iu, ref v, iv) ) { return; } for(i=0; i<=m-1; i++) { s = u[iu+i]; i1_ = (iv) - (ja); for(i_=ja; i_<=ja+n-1;i_++) { a[ia+i,i_] = a[ia+i,i_] + s*v[i_+i1_]; } } } /************************************************************************* Rank-1 correction: A := A + u*v' INPUT PARAMETERS: M - number of rows N - number of columns A - target matrix, MxN submatrix is updated IA - submatrix offset (row index) JA - submatrix offset (column index) U - vector #1 IU - subvector offset V - vector #2 IV - subvector offset *************************************************************************/ public static void rmatrixrank1(int m, int n, ref double[,] a, int ia, int ja, ref double[] u, int iu, ref double[] v, int iv) { int i = 0; double s = 0; int i_ = 0; int i1_ = 0; if( m==0 || n==0 ) { return; } if( ablasf.rmatrixrank1f(m, n, ref a, ia, ja, ref u, iu, ref v, iv) ) { return; } for(i=0; i<=m-1; i++) { s = u[iu+i]; i1_ = (iv) - (ja); for(i_=ja; i_<=ja+n-1;i_++) { a[ia+i,i_] = a[ia+i,i_] + s*v[i_+i1_]; } } } /************************************************************************* Matrix-vector product: y := op(A)*x INPUT PARAMETERS: M - number of rows of op(A) M>=0 N - number of columns of op(A) N>=0 A - target matrix IA - submatrix offset (row index) JA - submatrix offset (column index) OpA - operation type: * OpA=0 => op(A) = A * OpA=1 => op(A) = A^T * OpA=2 => op(A) = A^H X - input vector IX - subvector offset IY - subvector offset Y - preallocated matrix, must be large enough to store result OUTPUT PARAMETERS: Y - vector which stores result if M=0, then subroutine does nothing. if N=0, Y is filled by zeros. -- ALGLIB routine -- 28.01.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixmv(int m, int n, complex[,] a, int ia, int ja, int opa, complex[] x, int ix, ref complex[] y, int iy) { int i = 0; complex v = 0; int i_ = 0; int i1_ = 0; if( m==0 ) { return; } if( n==0 ) { for(i=0; i<=m-1; i++) { y[iy+i] = 0; } return; } if( ablasf.cmatrixmvf(m, n, a, ia, ja, opa, x, ix, ref y, iy) ) { return; } if( opa==0 ) { // // y = A*x // for(i=0; i<=m-1; i++) { i1_ = (ix)-(ja); v = 0.0; for(i_=ja; i_<=ja+n-1;i_++) { v += a[ia+i,i_]*x[i_+i1_]; } y[iy+i] = v; } return; } if( opa==1 ) { // // y = A^T*x // for(i=0; i<=m-1; i++) { y[iy+i] = 0; } for(i=0; i<=n-1; i++) { v = x[ix+i]; i1_ = (ja) - (iy); for(i_=iy; i_<=iy+m-1;i_++) { y[i_] = y[i_] + v*a[ia+i,i_+i1_]; } } return; } if( opa==2 ) { // // y = A^H*x // for(i=0; i<=m-1; i++) { y[iy+i] = 0; } for(i=0; i<=n-1; i++) { v = x[ix+i]; i1_ = (ja) - (iy); for(i_=iy; i_<=iy+m-1;i_++) { y[i_] = y[i_] + v*math.conj(a[ia+i,i_+i1_]); } } return; } } /************************************************************************* Matrix-vector product: y := op(A)*x INPUT PARAMETERS: M - number of rows of op(A) N - number of columns of op(A) A - target matrix IA - submatrix offset (row index) JA - submatrix offset (column index) OpA - operation type: * OpA=0 => op(A) = A * OpA=1 => op(A) = A^T X - input vector IX - subvector offset IY - subvector offset Y - preallocated matrix, must be large enough to store result OUTPUT PARAMETERS: Y - vector which stores result if M=0, then subroutine does nothing. if N=0, Y is filled by zeros. -- ALGLIB routine -- 28.01.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixmv(int m, int n, double[,] a, int ia, int ja, int opa, double[] x, int ix, ref double[] y, int iy) { int i = 0; double v = 0; int i_ = 0; int i1_ = 0; if( m==0 ) { return; } if( n==0 ) { for(i=0; i<=m-1; i++) { y[iy+i] = 0; } return; } if( ablasf.rmatrixmvf(m, n, a, ia, ja, opa, x, ix, ref y, iy) ) { return; } if( opa==0 ) { // // y = A*x // for(i=0; i<=m-1; i++) { i1_ = (ix)-(ja); v = 0.0; for(i_=ja; i_<=ja+n-1;i_++) { v += a[ia+i,i_]*x[i_+i1_]; } y[iy+i] = v; } return; } if( opa==1 ) { // // y = A^T*x // for(i=0; i<=m-1; i++) { y[iy+i] = 0; } for(i=0; i<=n-1; i++) { v = x[ix+i]; i1_ = (ja) - (iy); for(i_=iy; i_<=iy+m-1;i_++) { y[i_] = y[i_] + v*a[ia+i,i_+i1_]; } } return; } } /************************************************************************* This subroutine calculates X*op(A^-1) where: * X is MxN general matrix * A is NxN upper/lower triangular/unitriangular matrix * "op" may be identity transformation, transposition, conjugate transposition Multiplication result replaces X. Cache-oblivious algorithm is used. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 M - matrix size, N>=0 A - matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1] I1 - submatrix offset J1 - submatrix offset IsUpper - whether matrix is upper triangular IsUnit - whether matrix is unitriangular OpType - transformation type: * 0 - no transformation * 1 - transposition * 2 - conjugate transposition X - matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1] I2 - submatrix offset J2 - submatrix offset -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixrighttrsm(int m, int n, complex[,] a, int i1, int j1, bool isupper, bool isunit, int optype, complex[,] x, int i2, int j2) { int s1 = 0; int s2 = 0; int bs = 0; bs = ablascomplexblocksize(a); // // Basecase: either MKL-supported code or ALGLIB basecase code // if( ablasmkl.cmatrixrighttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2) ) { return; } if( m<=bs && n<=bs ) { cmatrixrighttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } // // Recursive subdivision // if( m>=n ) { // // Split X: X*A = (X1 X2)^T*A // ablascomplexsplitlength(a, m, ref s1, ref s2); cmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); cmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2); return; } else { // // Split A: // (A1 A12) // X*op(A) = X*op( ) // ( A2) // // Different variants depending on // IsUpper/OpType combinations // ablascomplexsplitlength(a, n, ref s1, ref s2); if( isupper && optype==0 ) { // // (A1 A12)-1 // X*A^-1 = (X1 X2)*( ) // ( A2) // cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2); cmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1, j1+s1, 0, 1.0, x, i2, j2+s1); cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1); return; } if( isupper && optype!=0 ) { // // (A1' )-1 // X*A^-1 = (X1 X2)*( ) // (A12' A2') // cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1); cmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1, j1+s1, optype, 1.0, x, i2, j2); cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } if( !isupper && optype==0 ) { // // (A1 )-1 // X*A^-1 = (X1 X2)*( ) // (A21 A2) // cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1); cmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1+s1, j1, 0, 1.0, x, i2, j2); cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } if( !isupper && optype!=0 ) { // // (A1' A21')-1 // X*A^-1 = (X1 X2)*( ) // ( A2') // cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2); cmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1+s1, j1, optype, 1.0, x, i2, j2+s1); cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1); return; } } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixrighttrsm(int m, int n, complex[,] a, int i1, int j1, bool isupper, bool isunit, int optype, complex[,] x, int i2, int j2) { cmatrixrighttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2); } /************************************************************************* This subroutine calculates op(A^-1)*X where: * X is MxN general matrix * A is MxM upper/lower triangular/unitriangular matrix * "op" may be identity transformation, transposition, conjugate transposition Multiplication result replaces X. Cache-oblivious algorithm is used. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 M - matrix size, N>=0 A - matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1] I1 - submatrix offset J1 - submatrix offset IsUpper - whether matrix is upper triangular IsUnit - whether matrix is unitriangular OpType - transformation type: * 0 - no transformation * 1 - transposition * 2 - conjugate transposition X - matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1] I2 - submatrix offset J2 - submatrix offset -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixlefttrsm(int m, int n, complex[,] a, int i1, int j1, bool isupper, bool isunit, int optype, complex[,] x, int i2, int j2) { int s1 = 0; int s2 = 0; int bs = 0; bs = ablascomplexblocksize(a); // // Basecase: either MKL-supported code or ALGLIB basecase code // if( ablasmkl.cmatrixlefttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2) ) { return; } if( m<=bs && n<=bs ) { cmatrixlefttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } // // Recursive subdivision // if( n>=m ) { // // Split X: op(A)^-1*X = op(A)^-1*(X1 X2) // ablascomplexsplitlength(x, n, ref s1, ref s2); cmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2); cmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1); return; } else { // // Split A // ablascomplexsplitlength(a, m, ref s1, ref s2); if( isupper && optype==0 ) { // // (A1 A12)-1 ( X1 ) // A^-1*X* = ( ) *( ) // ( A2) ( X2 ) // cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2); cmatrixgemm(s1, n, s2, -1.0, a, i1, j1+s1, 0, x, i2+s1, j2, 0, 1.0, x, i2, j2); cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } if( isupper && optype!=0 ) { // // (A1' )-1 ( X1 ) // A^-1*X = ( ) *( ) // (A12' A2') ( X2 ) // cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); cmatrixgemm(s2, n, s1, -1.0, a, i1, j1+s1, optype, x, i2, j2, 0, 1.0, x, i2+s1, j2); cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2); return; } if( !isupper && optype==0 ) { // // (A1 )-1 ( X1 ) // A^-1*X = ( ) *( ) // (A21 A2) ( X2 ) // cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); cmatrixgemm(s2, n, s1, -1.0, a, i1+s1, j1, 0, x, i2, j2, 0, 1.0, x, i2+s1, j2); cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2); return; } if( !isupper && optype!=0 ) { // // (A1' A21')-1 ( X1 ) // A^-1*X = ( ) *( ) // ( A2') ( X2 ) // cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2); cmatrixgemm(s1, n, s2, -1.0, a, i1+s1, j1, optype, x, i2+s1, j2, 0, 1.0, x, i2, j2); cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixlefttrsm(int m, int n, complex[,] a, int i1, int j1, bool isupper, bool isunit, int optype, complex[,] x, int i2, int j2) { cmatrixlefttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2); } /************************************************************************* This subroutine calculates X*op(A^-1) where: * X is MxN general matrix * A is NxN upper/lower triangular/unitriangular matrix * "op" may be identity transformation, transposition Multiplication result replaces X. Cache-oblivious algorithm is used. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 M - matrix size, N>=0 A - matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1] I1 - submatrix offset J1 - submatrix offset IsUpper - whether matrix is upper triangular IsUnit - whether matrix is unitriangular OpType - transformation type: * 0 - no transformation * 1 - transposition X - matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1] I2 - submatrix offset J2 - submatrix offset -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixrighttrsm(int m, int n, double[,] a, int i1, int j1, bool isupper, bool isunit, int optype, double[,] x, int i2, int j2) { int s1 = 0; int s2 = 0; int bs = 0; bs = ablasblocksize(a); // // Basecase: MKL or ALGLIB code // if( ablasmkl.rmatrixrighttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2) ) { return; } if( m<=bs && n<=bs ) { rmatrixrighttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } // // Recursive subdivision // if( m>=n ) { // // Split X: X*A = (X1 X2)^T*A // ablassplitlength(a, m, ref s1, ref s2); rmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); rmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2); return; } else { // // Split A: // (A1 A12) // X*op(A) = X*op( ) // ( A2) // // Different variants depending on // IsUpper/OpType combinations // ablassplitlength(a, n, ref s1, ref s2); if( isupper && optype==0 ) { // // (A1 A12)-1 // X*A^-1 = (X1 X2)*( ) // ( A2) // rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2); rmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1, j1+s1, 0, 1.0, x, i2, j2+s1); rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1); return; } if( isupper && optype!=0 ) { // // (A1' )-1 // X*A^-1 = (X1 X2)*( ) // (A12' A2') // rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1); rmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1, j1+s1, optype, 1.0, x, i2, j2); rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } if( !isupper && optype==0 ) { // // (A1 )-1 // X*A^-1 = (X1 X2)*( ) // (A21 A2) // rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1); rmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1+s1, j1, 0, 1.0, x, i2, j2); rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } if( !isupper && optype!=0 ) { // // (A1' A21')-1 // X*A^-1 = (X1 X2)*( ) // ( A2') // rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2); rmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1+s1, j1, optype, 1.0, x, i2, j2+s1); rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1); return; } } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_rmatrixrighttrsm(int m, int n, double[,] a, int i1, int j1, bool isupper, bool isunit, int optype, double[,] x, int i2, int j2) { rmatrixrighttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2); } /************************************************************************* This subroutine calculates op(A^-1)*X where: * X is MxN general matrix * A is MxM upper/lower triangular/unitriangular matrix * "op" may be identity transformation, transposition Multiplication result replaces X. Cache-oblivious algorithm is used. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 M - matrix size, N>=0 A - matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1] I1 - submatrix offset J1 - submatrix offset IsUpper - whether matrix is upper triangular IsUnit - whether matrix is unitriangular OpType - transformation type: * 0 - no transformation * 1 - transposition X - matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1] I2 - submatrix offset J2 - submatrix offset -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixlefttrsm(int m, int n, double[,] a, int i1, int j1, bool isupper, bool isunit, int optype, double[,] x, int i2, int j2) { int s1 = 0; int s2 = 0; int bs = 0; bs = ablasblocksize(a); // // Basecase: MKL or ALGLIB code // if( ablasmkl.rmatrixlefttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2) ) { return; } if( m<=bs && n<=bs ) { rmatrixlefttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } // // Recursive subdivision // if( n>=m ) { // // Split X: op(A)^-1*X = op(A)^-1*(X1 X2) // ablassplitlength(x, n, ref s1, ref s2); rmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2); rmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1); } else { // // Split A // ablassplitlength(a, m, ref s1, ref s2); if( isupper && optype==0 ) { // // (A1 A12)-1 ( X1 ) // A^-1*X* = ( ) *( ) // ( A2) ( X2 ) // rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2); rmatrixgemm(s1, n, s2, -1.0, a, i1, j1+s1, 0, x, i2+s1, j2, 0, 1.0, x, i2, j2); rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } if( isupper && optype!=0 ) { // // (A1' )-1 ( X1 ) // A^-1*X = ( ) *( ) // (A12' A2') ( X2 ) // rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); rmatrixgemm(s2, n, s1, -1.0, a, i1, j1+s1, optype, x, i2, j2, 0, 1.0, x, i2+s1, j2); rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2); return; } if( !isupper && optype==0 ) { // // (A1 )-1 ( X1 ) // A^-1*X = ( ) *( ) // (A21 A2) ( X2 ) // rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); rmatrixgemm(s2, n, s1, -1.0, a, i1+s1, j1, 0, x, i2, j2, 0, 1.0, x, i2+s1, j2); rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2); return; } if( !isupper && optype!=0 ) { // // (A1' A21')-1 ( X1 ) // A^-1*X = ( ) *( ) // ( A2') ( X2 ) // rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2); rmatrixgemm(s1, n, s2, -1.0, a, i1+s1, j1, optype, x, i2+s1, j2, 0, 1.0, x, i2, j2); rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2); return; } } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_rmatrixlefttrsm(int m, int n, double[,] a, int i1, int j1, bool isupper, bool isunit, int optype, double[,] x, int i2, int j2) { rmatrixlefttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2); } /************************************************************************* This subroutine calculates C=alpha*A*A^H+beta*C or C=alpha*A^H*A+beta*C where: * C is NxN Hermitian matrix given by its upper/lower triangle * A is NxK matrix when A*A^H is calculated, KxN matrix otherwise Additional info: * cache-oblivious algorithm is used. * multiplication result replaces C. If Beta=0, C elements are not used in calculations (not multiplied by zero - just not referenced) * if Alpha=0, A is not used (not multiplied by zero - just not referenced) * if both Beta and Alpha are zero, C is filled by zeros. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 K - matrix size, K>=0 Alpha - coefficient A - matrix IA - submatrix offset (row index) JA - submatrix offset (column index) OpTypeA - multiplication type: * 0 - A*A^H is calculated * 2 - A^H*A is calculated Beta - coefficient C - preallocated input/output matrix IC - submatrix offset (row index) JC - submatrix offset (column index) IsUpper - whether upper or lower triangle of C is updated; this function updates only one half of C, leaving other half unchanged (not referenced at all). -- ALGLIB routine -- 16.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixherk(int n, int k, double alpha, complex[,] a, int ia, int ja, int optypea, double beta, complex[,] c, int ic, int jc, bool isupper) { int s1 = 0; int s2 = 0; int bs = 0; bs = ablascomplexblocksize(a); // // Use MKL or ALGLIB basecase code // if( ablasmkl.cmatrixherkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper) ) { return; } if( n<=bs && k<=bs ) { cmatrixherk2(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); return; } // // Recursive division of the problem // if( k>=n ) { // // Split K // ablascomplexsplitlength(a, k, ref s1, ref s2); if( optypea==0 ) { cmatrixherk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); cmatrixherk(n, s2, alpha, a, ia, ja+s1, optypea, 1.0, c, ic, jc, isupper); } else { cmatrixherk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); cmatrixherk(n, s2, alpha, a, ia+s1, ja, optypea, 1.0, c, ic, jc, isupper); } } else { // // Split N // ablascomplexsplitlength(a, n, ref s1, ref s2); if( optypea==0 && isupper ) { cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); cmatrixgemm(s1, s2, k, alpha, a, ia, ja, 0, a, ia+s1, ja, 2, beta, c, ic, jc+s1); cmatrixherk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper); return; } if( optypea==0 && !isupper ) { cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); cmatrixgemm(s2, s1, k, alpha, a, ia+s1, ja, 0, a, ia, ja, 2, beta, c, ic+s1, jc); cmatrixherk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper); return; } if( optypea!=0 && isupper ) { cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); cmatrixgemm(s1, s2, k, alpha, a, ia, ja, 2, a, ia, ja+s1, 0, beta, c, ic, jc+s1); cmatrixherk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper); return; } if( optypea!=0 && !isupper ) { cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); cmatrixgemm(s2, s1, k, alpha, a, ia, ja+s1, 2, a, ia, ja, 0, beta, c, ic+s1, jc); cmatrixherk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper); return; } } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixherk(int n, int k, double alpha, complex[,] a, int ia, int ja, int optypea, double beta, complex[,] c, int ic, int jc, bool isupper) { cmatrixherk(n,k,alpha,a,ia,ja,optypea,beta,c,ic,jc,isupper); } /************************************************************************* This subroutine calculates C=alpha*A*A^T+beta*C or C=alpha*A^T*A+beta*C where: * C is NxN symmetric matrix given by its upper/lower triangle * A is NxK matrix when A*A^T is calculated, KxN matrix otherwise Additional info: * cache-oblivious algorithm is used. * multiplication result replaces C. If Beta=0, C elements are not used in calculations (not multiplied by zero - just not referenced) * if Alpha=0, A is not used (not multiplied by zero - just not referenced) * if both Beta and Alpha are zero, C is filled by zeros. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS N - matrix size, N>=0 K - matrix size, K>=0 Alpha - coefficient A - matrix IA - submatrix offset (row index) JA - submatrix offset (column index) OpTypeA - multiplication type: * 0 - A*A^T is calculated * 2 - A^T*A is calculated Beta - coefficient C - preallocated input/output matrix IC - submatrix offset (row index) JC - submatrix offset (column index) IsUpper - whether C is upper triangular or lower triangular -- ALGLIB routine -- 16.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixsyrk(int n, int k, double alpha, double[,] a, int ia, int ja, int optypea, double beta, double[,] c, int ic, int jc, bool isupper) { int s1 = 0; int s2 = 0; int bs = 0; bs = ablasblocksize(a); // // Use MKL or generic basecase code // if( ablasmkl.rmatrixsyrkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper) ) { return; } if( n<=bs && k<=bs ) { rmatrixsyrk2(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); return; } // // Recursive subdivision of the problem // if( k>=n ) { // // Split K // ablassplitlength(a, k, ref s1, ref s2); if( optypea==0 ) { rmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); rmatrixsyrk(n, s2, alpha, a, ia, ja+s1, optypea, 1.0, c, ic, jc, isupper); } else { rmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); rmatrixsyrk(n, s2, alpha, a, ia+s1, ja, optypea, 1.0, c, ic, jc, isupper); } } else { // // Split N // ablassplitlength(a, n, ref s1, ref s2); if( optypea==0 && isupper ) { rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); rmatrixgemm(s1, s2, k, alpha, a, ia, ja, 0, a, ia+s1, ja, 1, beta, c, ic, jc+s1); rmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper); return; } if( optypea==0 && !isupper ) { rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); rmatrixgemm(s2, s1, k, alpha, a, ia+s1, ja, 0, a, ia, ja, 1, beta, c, ic+s1, jc); rmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper); return; } if( optypea!=0 && isupper ) { rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); rmatrixgemm(s1, s2, k, alpha, a, ia, ja, 1, a, ia, ja+s1, 0, beta, c, ic, jc+s1); rmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper); return; } if( optypea!=0 && !isupper ) { rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); rmatrixgemm(s2, s1, k, alpha, a, ia, ja+s1, 1, a, ia, ja, 0, beta, c, ic+s1, jc); rmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper); return; } } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_rmatrixsyrk(int n, int k, double alpha, double[,] a, int ia, int ja, int optypea, double beta, double[,] c, int ic, int jc, bool isupper) { rmatrixsyrk(n,k,alpha,a,ia,ja,optypea,beta,c,ic,jc,isupper); } /************************************************************************* This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where: * C is MxN general matrix * op1(A) is MxK matrix * op2(B) is KxN matrix * "op" may be identity transformation, transposition, conjugate transposition Additional info: * cache-oblivious algorithm is used. * multiplication result replaces C. If Beta=0, C elements are not used in calculations (not multiplied by zero - just not referenced) * if Alpha=0, A is not used (not multiplied by zero - just not referenced) * if both Beta and Alpha are zero, C is filled by zeros. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. IMPORTANT: This function does NOT preallocate output matrix C, it MUST be preallocated by caller prior to calling this function. In case C does not have enough space to store result, exception will be generated. INPUT PARAMETERS M - matrix size, M>0 N - matrix size, N>0 K - matrix size, K>0 Alpha - coefficient A - matrix IA - submatrix offset JA - submatrix offset OpTypeA - transformation type: * 0 - no transformation * 1 - transposition * 2 - conjugate transposition B - matrix IB - submatrix offset JB - submatrix offset OpTypeB - transformation type: * 0 - no transformation * 1 - transposition * 2 - conjugate transposition Beta - coefficient C - matrix (PREALLOCATED, large enough to store result) IC - submatrix offset JC - submatrix offset -- ALGLIB routine -- 16.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixgemm(int m, int n, int k, complex alpha, complex[,] a, int ia, int ja, int optypea, complex[,] b, int ib, int jb, int optypeb, complex beta, complex[,] c, int ic, int jc) { int s1 = 0; int s2 = 0; int bs = 0; bs = ablascomplexblocksize(a); // // Use MKL or ALGLIB basecase code // if( ablasmkl.cmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc) ) { return; } if( (m<=bs && n<=bs) && k<=bs ) { ablasf.cmatrixgemmk(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); return; } // // SMP support is turned on when M or N are larger than some boundary value. // Magnitude of K is not taken into account because splitting on K does not // allow us to spawn child tasks. // // // Recursive algorithm: parallel splitting on M/N // if( m>=n && m>=k ) { // // A*B = (A1 A2)^T*B // ablascomplexsplitlength(a, m, ref s1, ref s2); cmatrixgemm(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); if( optypea==0 ) { cmatrixgemm(s2, n, k, alpha, a, ia+s1, ja, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc); } else { cmatrixgemm(s2, n, k, alpha, a, ia, ja+s1, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc); } return; } if( n>=m && n>=k ) { // // A*B = A*(B1 B2) // ablascomplexsplitlength(a, n, ref s1, ref s2); if( optypeb==0 ) { cmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); cmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib, jb+s1, optypeb, beta, c, ic, jc+s1); } else { cmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); cmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib+s1, jb, optypeb, beta, c, ic, jc+s1); } return; } // // Recursive algorithm: serial splitting on K // // // A*B = (A1 A2)*(B1 B2)^T // ablascomplexsplitlength(a, k, ref s1, ref s2); if( optypea==0 && optypeb==0 ) { cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); cmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc); } if( optypea==0 && optypeb!=0 ) { cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); cmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc); } if( optypea!=0 && optypeb==0 ) { cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); cmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc); } if( optypea!=0 && optypeb!=0 ) { cmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); cmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc); } return; } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixgemm(int m, int n, int k, complex alpha, complex[,] a, int ia, int ja, int optypea, complex[,] b, int ib, int jb, int optypeb, complex beta, complex[,] c, int ic, int jc) { cmatrixgemm(m,n,k,alpha,a,ia,ja,optypea,b,ib,jb,optypeb,beta,c,ic,jc); } /************************************************************************* This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where: * C is MxN general matrix * op1(A) is MxK matrix * op2(B) is KxN matrix * "op" may be identity transformation, transposition Additional info: * cache-oblivious algorithm is used. * multiplication result replaces C. If Beta=0, C elements are not used in calculations (not multiplied by zero - just not referenced) * if Alpha=0, A is not used (not multiplied by zero - just not referenced) * if both Beta and Alpha are zero, C is filled by zeros. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. Because starting/stopping worker thread always ! involves some overhead, parallelism starts to be profitable for N's ! larger than 128. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. IMPORTANT: This function does NOT preallocate output matrix C, it MUST be preallocated by caller prior to calling this function. In case C does not have enough space to store result, exception will be generated. INPUT PARAMETERS M - matrix size, M>0 N - matrix size, N>0 K - matrix size, K>0 Alpha - coefficient A - matrix IA - submatrix offset JA - submatrix offset OpTypeA - transformation type: * 0 - no transformation * 1 - transposition B - matrix IB - submatrix offset JB - submatrix offset OpTypeB - transformation type: * 0 - no transformation * 1 - transposition Beta - coefficient C - PREALLOCATED output matrix, large enough to store result IC - submatrix offset JC - submatrix offset -- ALGLIB routine -- 2009-2013 Bochkanov Sergey *************************************************************************/ public static void rmatrixgemm(int m, int n, int k, double alpha, double[,] a, int ia, int ja, int optypea, double[,] b, int ib, int jb, int optypeb, double beta, double[,] c, int ic, int jc) { int s1 = 0; int s2 = 0; int bs = 0; bs = ablasblocksize(a); // // Check input sizes for correctness // alglib.ap.assert(optypea==0 || optypea==1, "RMatrixGEMM: incorrect OpTypeA (must be 0 or 1)"); alglib.ap.assert(optypeb==0 || optypeb==1, "RMatrixGEMM: incorrect OpTypeB (must be 0 or 1)"); alglib.ap.assert(ic+m<=alglib.ap.rows(c), "RMatrixGEMM: incorect size of output matrix C"); alglib.ap.assert(jc+n<=alglib.ap.cols(c), "RMatrixGEMM: incorect size of output matrix C"); // // Use MKL or ALGLIB basecase code // if( ablasmkl.rmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc) ) { return; } if( (m<=bs && n<=bs) && k<=bs ) { ablasf.rmatrixgemmk(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); return; } // // SMP support is turned on when M or N are larger than some boundary value. // Magnitude of K is not taken into account because splitting on K does not // allow us to spawn child tasks. // // // Recursive algorithm: split on M or N // if( m>=n && m>=k ) { // // A*B = (A1 A2)^T*B // ablassplitlength(a, m, ref s1, ref s2); if( optypea==0 ) { rmatrixgemm(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); rmatrixgemm(s2, n, k, alpha, a, ia+s1, ja, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc); } else { rmatrixgemm(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); rmatrixgemm(s2, n, k, alpha, a, ia, ja+s1, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc); } return; } if( n>=m && n>=k ) { // // A*B = A*(B1 B2) // ablassplitlength(a, n, ref s1, ref s2); if( optypeb==0 ) { rmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); rmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib, jb+s1, optypeb, beta, c, ic, jc+s1); } else { rmatrixgemm(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); rmatrixgemm(m, s2, k, alpha, a, ia, ja, optypea, b, ib+s1, jb, optypeb, beta, c, ic, jc+s1); } return; } // // Recursive algorithm: split on K // // // A*B = (A1 A2)*(B1 B2)^T // ablassplitlength(a, k, ref s1, ref s2); if( optypea==0 && optypeb==0 ) { rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); rmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc); } if( optypea==0 && optypeb!=0 ) { rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); rmatrixgemm(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc); } if( optypea!=0 && optypeb==0 ) { rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); rmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc); } if( optypea!=0 && optypeb!=0 ) { rmatrixgemm(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc); rmatrixgemm(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc); } return; } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_rmatrixgemm(int m, int n, int k, double alpha, double[,] a, int ia, int ja, int optypea, double[,] b, int ib, int jb, int optypeb, double beta, double[,] c, int ic, int jc) { rmatrixgemm(m,n,k,alpha,a,ia,ja,optypea,b,ib,jb,optypeb,beta,c,ic,jc); } /************************************************************************* This subroutine is an older version of CMatrixHERK(), one with wrong name (it is HErmitian update, not SYmmetric). It is left here for backward compatibility. -- ALGLIB routine -- 16.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixsyrk(int n, int k, double alpha, complex[,] a, int ia, int ja, int optypea, double beta, complex[,] c, int ic, int jc, bool isupper) { cmatrixherk(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper); } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixsyrk(int n, int k, double alpha, complex[,] a, int ia, int ja, int optypea, double beta, complex[,] c, int ic, int jc, bool isupper) { cmatrixsyrk(n,k,alpha,a,ia,ja,optypea,beta,c,ic,jc,isupper); } /************************************************************************* Complex ABLASSplitLength -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ private static void ablasinternalsplitlength(int n, int nb, ref int n1, ref int n2) { int r = 0; n1 = 0; n2 = 0; if( n<=nb ) { // // Block size, no further splitting // n1 = n; n2 = 0; } else { // // Greater than block size // if( n%nb!=0 ) { // // Split remainder // n2 = n%nb; n1 = n-n2; } else { // // Split on block boundaries // n2 = n/2; n1 = n-n2; if( n1%nb==0 ) { return; } r = nb-n1%nb; n1 = n1+r; n2 = n2-r; } } } /************************************************************************* Level 2 variant of CMatrixRightTRSM *************************************************************************/ private static void cmatrixrighttrsm2(int m, int n, complex[,] a, int i1, int j1, bool isupper, bool isunit, int optype, complex[,] x, int i2, int j2) { int i = 0; int j = 0; complex vc = 0; complex vd = 0; int i_ = 0; int i1_ = 0; // // Special case // if( n*m==0 ) { return; } // // Try to call fast TRSM // if( ablasf.cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2) ) { return; } // // General case // if( isupper ) { // // Upper triangular matrix // if( optype==0 ) { // // X*A^(-1) // for(i=0; i<=m-1; i++) { for(j=0; j<=n-1; j++) { if( isunit ) { vd = 1; } else { vd = a[i1+j,j1+j]; } x[i2+i,j2+j] = x[i2+i,j2+j]/vd; if( j=0; j--) { vc = 0; vd = 1; if( j=0; j--) { vc = 0; vd = 1; if( j=0; j--) { if( isunit ) { vd = 1; } else { vd = a[i1+j,j1+j]; } x[i2+i,j2+j] = x[i2+i,j2+j]/vd; if( j>0 ) { vc = x[i2+i,j2+j]; i1_ = (j1) - (j2); for(i_=j2; i_<=j2+j-1;i_++) { x[i2+i,i_] = x[i2+i,i_] - vc*a[i1+j,i_+i1_]; } } } } return; } if( optype==1 ) { // // X*A^(-T) // for(i=0; i<=m-1; i++) { for(j=0; j<=n-1; j++) { vc = 0; vd = 1; if( j>0 ) { i1_ = (j1)-(j2); vc = 0.0; for(i_=j2; i_<=j2+j-1;i_++) { vc += x[i2+i,i_]*a[i1+j,i_+i1_]; } } if( !isunit ) { vd = a[i1+j,j1+j]; } x[i2+i,j2+j] = (x[i2+i,j2+j]-vc)/vd; } } return; } if( optype==2 ) { // // X*A^(-H) // for(i=0; i<=m-1; i++) { for(j=0; j<=n-1; j++) { vc = 0; vd = 1; if( j>0 ) { i1_ = (j1)-(j2); vc = 0.0; for(i_=j2; i_<=j2+j-1;i_++) { vc += x[i2+i,i_]*math.conj(a[i1+j,i_+i1_]); } } if( !isunit ) { vd = math.conj(a[i1+j,j1+j]); } x[i2+i,j2+j] = (x[i2+i,j2+j]-vc)/vd; } } return; } } } /************************************************************************* Level-2 subroutine *************************************************************************/ private static void cmatrixlefttrsm2(int m, int n, complex[,] a, int i1, int j1, bool isupper, bool isunit, int optype, complex[,] x, int i2, int j2) { int i = 0; int j = 0; complex vc = 0; complex vd = 0; int i_ = 0; // // Special case // if( n*m==0 ) { return; } // // Try to call fast TRSM // if( ablasf.cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2) ) { return; } // // General case // if( isupper ) { // // Upper triangular matrix // if( optype==0 ) { // // A^(-1)*X // for(i=m-1; i>=0; i--) { for(j=i+1; j<=m-1; j++) { vc = a[i1+i,j1+j]; for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = x[i2+i,i_] - vc*x[i2+j,i_]; } } if( !isunit ) { vd = 1/a[i1+i,j1+i]; for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = vd*x[i2+i,i_]; } } } return; } if( optype==1 ) { // // A^(-T)*X // for(i=0; i<=m-1; i++) { if( isunit ) { vd = 1; } else { vd = 1/a[i1+i,j1+i]; } for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = vd*x[i2+i,i_]; } for(j=i+1; j<=m-1; j++) { vc = a[i1+i,j1+j]; for(i_=j2; i_<=j2+n-1;i_++) { x[i2+j,i_] = x[i2+j,i_] - vc*x[i2+i,i_]; } } } return; } if( optype==2 ) { // // A^(-H)*X // for(i=0; i<=m-1; i++) { if( isunit ) { vd = 1; } else { vd = 1/math.conj(a[i1+i,j1+i]); } for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = vd*x[i2+i,i_]; } for(j=i+1; j<=m-1; j++) { vc = math.conj(a[i1+i,j1+j]); for(i_=j2; i_<=j2+n-1;i_++) { x[i2+j,i_] = x[i2+j,i_] - vc*x[i2+i,i_]; } } } return; } } else { // // Lower triangular matrix // if( optype==0 ) { // // A^(-1)*X // for(i=0; i<=m-1; i++) { for(j=0; j<=i-1; j++) { vc = a[i1+i,j1+j]; for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = x[i2+i,i_] - vc*x[i2+j,i_]; } } if( isunit ) { vd = 1; } else { vd = 1/a[i1+j,j1+j]; } for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = vd*x[i2+i,i_]; } } return; } if( optype==1 ) { // // A^(-T)*X // for(i=m-1; i>=0; i--) { if( isunit ) { vd = 1; } else { vd = 1/a[i1+i,j1+i]; } for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = vd*x[i2+i,i_]; } for(j=i-1; j>=0; j--) { vc = a[i1+i,j1+j]; for(i_=j2; i_<=j2+n-1;i_++) { x[i2+j,i_] = x[i2+j,i_] - vc*x[i2+i,i_]; } } } return; } if( optype==2 ) { // // A^(-H)*X // for(i=m-1; i>=0; i--) { if( isunit ) { vd = 1; } else { vd = 1/math.conj(a[i1+i,j1+i]); } for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = vd*x[i2+i,i_]; } for(j=i-1; j>=0; j--) { vc = math.conj(a[i1+i,j1+j]); for(i_=j2; i_<=j2+n-1;i_++) { x[i2+j,i_] = x[i2+j,i_] - vc*x[i2+i,i_]; } } } return; } } } /************************************************************************* Level 2 subroutine -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ private static void rmatrixrighttrsm2(int m, int n, double[,] a, int i1, int j1, bool isupper, bool isunit, int optype, double[,] x, int i2, int j2) { int i = 0; int j = 0; double vr = 0; double vd = 0; int i_ = 0; int i1_ = 0; // // Special case // if( n*m==0 ) { return; } // // Try to use "fast" code // if( ablasf.rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2) ) { return; } // // General case // if( isupper ) { // // Upper triangular matrix // if( optype==0 ) { // // X*A^(-1) // for(i=0; i<=m-1; i++) { for(j=0; j<=n-1; j++) { if( isunit ) { vd = 1; } else { vd = a[i1+j,j1+j]; } x[i2+i,j2+j] = x[i2+i,j2+j]/vd; if( j=0; j--) { vr = 0; vd = 1; if( j=0; j--) { if( isunit ) { vd = 1; } else { vd = a[i1+j,j1+j]; } x[i2+i,j2+j] = x[i2+i,j2+j]/vd; if( j>0 ) { vr = x[i2+i,j2+j]; i1_ = (j1) - (j2); for(i_=j2; i_<=j2+j-1;i_++) { x[i2+i,i_] = x[i2+i,i_] - vr*a[i1+j,i_+i1_]; } } } } return; } if( optype==1 ) { // // X*A^(-T) // for(i=0; i<=m-1; i++) { for(j=0; j<=n-1; j++) { vr = 0; vd = 1; if( j>0 ) { i1_ = (j1)-(j2); vr = 0.0; for(i_=j2; i_<=j2+j-1;i_++) { vr += x[i2+i,i_]*a[i1+j,i_+i1_]; } } if( !isunit ) { vd = a[i1+j,j1+j]; } x[i2+i,j2+j] = (x[i2+i,j2+j]-vr)/vd; } } return; } } } /************************************************************************* Level 2 subroutine *************************************************************************/ private static void rmatrixlefttrsm2(int m, int n, double[,] a, int i1, int j1, bool isupper, bool isunit, int optype, double[,] x, int i2, int j2) { int i = 0; int j = 0; double vr = 0; double vd = 0; int i_ = 0; // // Special case // if( n==0 || m==0 ) { return; } // // Try fast code // if( ablasf.rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2) ) { return; } // // General case // if( isupper ) { // // Upper triangular matrix // if( optype==0 ) { // // A^(-1)*X // for(i=m-1; i>=0; i--) { for(j=i+1; j<=m-1; j++) { vr = a[i1+i,j1+j]; for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = x[i2+i,i_] - vr*x[i2+j,i_]; } } if( !isunit ) { vd = 1/a[i1+i,j1+i]; for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = vd*x[i2+i,i_]; } } } return; } if( optype==1 ) { // // A^(-T)*X // for(i=0; i<=m-1; i++) { if( isunit ) { vd = 1; } else { vd = 1/a[i1+i,j1+i]; } for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = vd*x[i2+i,i_]; } for(j=i+1; j<=m-1; j++) { vr = a[i1+i,j1+j]; for(i_=j2; i_<=j2+n-1;i_++) { x[i2+j,i_] = x[i2+j,i_] - vr*x[i2+i,i_]; } } } return; } } else { // // Lower triangular matrix // if( optype==0 ) { // // A^(-1)*X // for(i=0; i<=m-1; i++) { for(j=0; j<=i-1; j++) { vr = a[i1+i,j1+j]; for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = x[i2+i,i_] - vr*x[i2+j,i_]; } } if( isunit ) { vd = 1; } else { vd = 1/a[i1+j,j1+j]; } for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = vd*x[i2+i,i_]; } } return; } if( optype==1 ) { // // A^(-T)*X // for(i=m-1; i>=0; i--) { if( isunit ) { vd = 1; } else { vd = 1/a[i1+i,j1+i]; } for(i_=j2; i_<=j2+n-1;i_++) { x[i2+i,i_] = vd*x[i2+i,i_]; } for(j=i-1; j>=0; j--) { vr = a[i1+i,j1+j]; for(i_=j2; i_<=j2+n-1;i_++) { x[i2+j,i_] = x[i2+j,i_] - vr*x[i2+i,i_]; } } } return; } } } /************************************************************************* Level 2 subroutine *************************************************************************/ private static void cmatrixherk2(int n, int k, double alpha, complex[,] a, int ia, int ja, int optypea, double beta, complex[,] c, int ic, int jc, bool isupper) { int i = 0; int j = 0; int j1 = 0; int j2 = 0; complex v = 0; int i_ = 0; int i1_ = 0; // // Fast exit (nothing to be done) // if( ((double)(alpha)==(double)(0) || k==0) && (double)(beta)==(double)(1) ) { return; } // // Try to call fast SYRK // if( ablasf.cmatrixherkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper) ) { return; } // // SYRK // if( optypea==0 ) { // // C=alpha*A*A^H+beta*C // for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i; j2 = n-1; } else { j1 = 0; j2 = i; } for(j=j1; j<=j2; j++) { if( (double)(alpha)!=(double)(0) && k>0 ) { v = 0.0; for(i_=ja; i_<=ja+k-1;i_++) { v += a[ia+i,i_]*math.conj(a[ia+j,i_]); } } else { v = 0; } if( (double)(beta)==(double)(0) ) { c[ic+i,jc+j] = alpha*v; } else { c[ic+i,jc+j] = beta*c[ic+i,jc+j]+alpha*v; } } } return; } else { // // C=alpha*A^H*A+beta*C // for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i; j2 = n-1; } else { j1 = 0; j2 = i; } if( (double)(beta)==(double)(0) ) { for(j=j1; j<=j2; j++) { c[ic+i,jc+j] = 0; } } else { for(i_=jc+j1; i_<=jc+j2;i_++) { c[ic+i,i_] = beta*c[ic+i,i_]; } } } for(i=0; i<=k-1; i++) { for(j=0; j<=n-1; j++) { if( isupper ) { j1 = j; j2 = n-1; } else { j1 = 0; j2 = j; } v = alpha*math.conj(a[ia+i,ja+j]); i1_ = (ja+j1) - (jc+j1); for(i_=jc+j1; i_<=jc+j2;i_++) { c[ic+j,i_] = c[ic+j,i_] + v*a[ia+i,i_+i1_]; } } } return; } } /************************************************************************* Level 2 subrotuine *************************************************************************/ private static void rmatrixsyrk2(int n, int k, double alpha, double[,] a, int ia, int ja, int optypea, double beta, double[,] c, int ic, int jc, bool isupper) { int i = 0; int j = 0; int j1 = 0; int j2 = 0; double v = 0; int i_ = 0; int i1_ = 0; // // Fast exit (nothing to be done) // if( ((double)(alpha)==(double)(0) || k==0) && (double)(beta)==(double)(1) ) { return; } // // Try to call fast SYRK // if( ablasf.rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper) ) { return; } // // SYRK // if( optypea==0 ) { // // C=alpha*A*A^H+beta*C // for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i; j2 = n-1; } else { j1 = 0; j2 = i; } for(j=j1; j<=j2; j++) { if( (double)(alpha)!=(double)(0) && k>0 ) { v = 0.0; for(i_=ja; i_<=ja+k-1;i_++) { v += a[ia+i,i_]*a[ia+j,i_]; } } else { v = 0; } if( (double)(beta)==(double)(0) ) { c[ic+i,jc+j] = alpha*v; } else { c[ic+i,jc+j] = beta*c[ic+i,jc+j]+alpha*v; } } } return; } else { // // C=alpha*A^H*A+beta*C // for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i; j2 = n-1; } else { j1 = 0; j2 = i; } if( (double)(beta)==(double)(0) ) { for(j=j1; j<=j2; j++) { c[ic+i,jc+j] = 0; } } else { for(i_=jc+j1; i_<=jc+j2;i_++) { c[ic+i,i_] = beta*c[ic+i,i_]; } } } for(i=0; i<=k-1; i++) { for(j=0; j<=n-1; j++) { if( isupper ) { j1 = j; j2 = n-1; } else { j1 = 0; j2 = j; } v = alpha*a[ia+i,ja+j]; i1_ = (ja+j1) - (jc+j1); for(i_=jc+j1; i_<=jc+j2;i_++) { c[ic+j,i_] = c[ic+j,i_] + v*a[ia+i,i_+i1_]; } } } return; } } } public class ortfac { /************************************************************************* QR decomposition of a rectangular matrix of size MxN COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix A whose indexes range within [0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. Output parameters: A - matrices Q and R in compact form (see below). Tau - array of scalar factors which are used to form matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)]. Matrix A is represented as A = QR, where Q is an orthogonal matrix of size MxM, R - upper triangular (or upper trapezoid) matrix of size M x N. The elements of matrix R are located on and above the main diagonal of matrix A. The elements which are located in Tau array and below the main diagonal of matrix A are used to form matrix Q as follows: Matrix Q is represented as a product of elementary reflections Q = H(0)*H(2)*...*H(k-1), where k = min(m,n), and each H(i) is in the form H(i) = 1 - tau * v * (v^T) where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i). -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixqr(ref double[,] a, int m, int n, ref double[] tau) { double[] work = new double[0]; double[] t = new double[0]; double[] taubuf = new double[0]; int minmn = 0; double[,] tmpa = new double[0,0]; double[,] tmpt = new double[0,0]; double[,] tmpr = new double[0,0]; int blockstart = 0; int blocksize = 0; int rowscount = 0; int i = 0; int i_ = 0; int i1_ = 0; tau = new double[0]; if( m<=0 || n<=0 ) { return; } minmn = Math.Min(m, n); work = new double[Math.Max(m, n)+1]; t = new double[Math.Max(m, n)+1]; tau = new double[minmn]; taubuf = new double[minmn]; tmpa = new double[m, ablas.ablasblocksize(a)]; tmpt = new double[ablas.ablasblocksize(a), 2*ablas.ablasblocksize(a)]; tmpr = new double[2*ablas.ablasblocksize(a), n]; // // Blocked code // blockstart = 0; while( blockstart!=minmn ) { // // Determine block size // blocksize = minmn-blockstart; if( blocksize>ablas.ablasblocksize(a) ) { blocksize = ablas.ablasblocksize(a); } rowscount = m-blockstart; // // QR decomposition of submatrix. // Matrix is copied to temporary storage to solve // some TLB issues arising from non-contiguous memory // access pattern. // ablas.rmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, ref tmpa, 0, 0); rmatrixqrbasecase(ref tmpa, rowscount, blocksize, ref work, ref t, ref taubuf); ablas.rmatrixcopy(rowscount, blocksize, tmpa, 0, 0, ref a, blockstart, blockstart); i1_ = (0) - (blockstart); for(i_=blockstart; i_<=blockstart+blocksize-1;i_++) { tau[i_] = taubuf[i_+i1_]; } // // Update the rest, choose between: // a) Level 2 algorithm (when the rest of the matrix is small enough) // b) blocked algorithm, see algorithm 5 from 'A storage efficient WY // representation for products of Householder transformations', // by R. Schreiber and C. Van Loan. // if( blockstart+blocksize<=n-1 ) { if( n-blockstart-blocksize>=2*ablas.ablasblocksize(a) || rowscount>=4*ablas.ablasblocksize(a) ) { // // Prepare block reflector // rmatrixblockreflector(ref tmpa, ref taubuf, true, rowscount, blocksize, ref tmpt, ref work); // // Multiply the rest of A by Q'. // // Q = E + Y*T*Y' = E + TmpA*TmpT*TmpA' // Q' = E + Y*T'*Y' = E + TmpA*TmpT'*TmpA' // ablas.rmatrixgemm(blocksize, n-blockstart-blocksize, rowscount, 1.0, tmpa, 0, 0, 1, a, blockstart, blockstart+blocksize, 0, 0.0, tmpr, 0, 0); ablas.rmatrixgemm(blocksize, n-blockstart-blocksize, blocksize, 1.0, tmpt, 0, 0, 1, tmpr, 0, 0, 0, 0.0, tmpr, blocksize, 0); ablas.rmatrixgemm(rowscount, n-blockstart-blocksize, blocksize, 1.0, tmpa, 0, 0, 0, tmpr, blocksize, 0, 0, 1.0, a, blockstart, blockstart+blocksize); } else { // // Level 2 algorithm // for(i=0; i<=blocksize-1; i++) { i1_ = (i) - (1); for(i_=1; i_<=rowscount-i;i_++) { t[i_] = tmpa[i_+i1_,i]; } t[1] = 1; reflections.applyreflectionfromtheleft(ref a, taubuf[i], t, blockstart+i, m-1, blockstart+blocksize, n-1, ref work); } } } // // Advance // blockstart = blockstart+blocksize; } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_rmatrixqr(ref double[,] a, int m, int n, ref double[] tau) { rmatrixqr(ref a,m,n,ref tau); } /************************************************************************* LQ decomposition of a rectangular matrix of size MxN COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix A whose indexes range within [0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. Output parameters: A - matrices L and Q in compact form (see below) Tau - array of scalar factors which are used to form matrix Q. Array whose index ranges within [0..Min(M,N)-1]. Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size MxM, L - lower triangular (or lower trapezoid) matrix of size M x N. The elements of matrix L are located on and below the main diagonal of matrix A. The elements which are located in Tau array and above the main diagonal of matrix A are used to form matrix Q as follows: Matrix Q is represented as a product of elementary reflections Q = H(k-1)*H(k-2)*...*H(1)*H(0), where k = min(m,n), and each H(i) is of the form H(i) = 1 - tau * v * (v^T) where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0, v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1). -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixlq(ref double[,] a, int m, int n, ref double[] tau) { double[] work = new double[0]; double[] t = new double[0]; double[] taubuf = new double[0]; int minmn = 0; double[,] tmpa = new double[0,0]; double[,] tmpt = new double[0,0]; double[,] tmpr = new double[0,0]; int blockstart = 0; int blocksize = 0; int columnscount = 0; int i = 0; int i_ = 0; int i1_ = 0; tau = new double[0]; if( m<=0 || n<=0 ) { return; } minmn = Math.Min(m, n); work = new double[Math.Max(m, n)+1]; t = new double[Math.Max(m, n)+1]; tau = new double[minmn]; taubuf = new double[minmn]; tmpa = new double[ablas.ablasblocksize(a), n]; tmpt = new double[ablas.ablasblocksize(a), 2*ablas.ablasblocksize(a)]; tmpr = new double[m, 2*ablas.ablasblocksize(a)]; // // Blocked code // blockstart = 0; while( blockstart!=minmn ) { // // Determine block size // blocksize = minmn-blockstart; if( blocksize>ablas.ablasblocksize(a) ) { blocksize = ablas.ablasblocksize(a); } columnscount = n-blockstart; // // LQ decomposition of submatrix. // Matrix is copied to temporary storage to solve // some TLB issues arising from non-contiguous memory // access pattern. // ablas.rmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, ref tmpa, 0, 0); rmatrixlqbasecase(ref tmpa, blocksize, columnscount, ref work, ref t, ref taubuf); ablas.rmatrixcopy(blocksize, columnscount, tmpa, 0, 0, ref a, blockstart, blockstart); i1_ = (0) - (blockstart); for(i_=blockstart; i_<=blockstart+blocksize-1;i_++) { tau[i_] = taubuf[i_+i1_]; } // // Update the rest, choose between: // a) Level 2 algorithm (when the rest of the matrix is small enough) // b) blocked algorithm, see algorithm 5 from 'A storage efficient WY // representation for products of Householder transformations', // by R. Schreiber and C. Van Loan. // if( blockstart+blocksize<=m-1 ) { if( m-blockstart-blocksize>=2*ablas.ablasblocksize(a) ) { // // Prepare block reflector // rmatrixblockreflector(ref tmpa, ref taubuf, false, columnscount, blocksize, ref tmpt, ref work); // // Multiply the rest of A by Q. // // Q = E + Y*T*Y' = E + TmpA'*TmpT*TmpA // ablas.rmatrixgemm(m-blockstart-blocksize, blocksize, columnscount, 1.0, a, blockstart+blocksize, blockstart, 0, tmpa, 0, 0, 1, 0.0, tmpr, 0, 0); ablas.rmatrixgemm(m-blockstart-blocksize, blocksize, blocksize, 1.0, tmpr, 0, 0, 0, tmpt, 0, 0, 0, 0.0, tmpr, 0, blocksize); ablas.rmatrixgemm(m-blockstart-blocksize, columnscount, blocksize, 1.0, tmpr, 0, blocksize, 0, tmpa, 0, 0, 0, 1.0, a, blockstart+blocksize, blockstart); } else { // // Level 2 algorithm // for(i=0; i<=blocksize-1; i++) { i1_ = (i) - (1); for(i_=1; i_<=columnscount-i;i_++) { t[i_] = tmpa[i,i_+i1_]; } t[1] = 1; reflections.applyreflectionfromtheright(ref a, taubuf[i], t, blockstart+blocksize, m-1, blockstart+i, n-1, ref work); } } } // // Advance // blockstart = blockstart+blocksize; } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_rmatrixlq(ref double[,] a, int m, int n, ref double[] tau) { rmatrixlq(ref a,m,n,ref tau); } /************************************************************************* QR decomposition of a rectangular complex matrix of size MxN COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix A whose indexes range within [0..M-1, 0..N-1] M - number of rows in matrix A. N - number of columns in matrix A. Output parameters: A - matrices Q and R in compact form Tau - array of scalar factors which are used to form matrix Q. Array whose indexes range within [0.. Min(M,N)-1] Matrix A is represented as A = QR, where Q is an orthogonal matrix of size MxM, R - upper triangular (or upper trapezoid) matrix of size MxN. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 *************************************************************************/ public static void cmatrixqr(ref complex[,] a, int m, int n, ref complex[] tau) { complex[] work = new complex[0]; complex[] t = new complex[0]; complex[] taubuf = new complex[0]; int minmn = 0; complex[,] tmpa = new complex[0,0]; complex[,] tmpt = new complex[0,0]; complex[,] tmpr = new complex[0,0]; int blockstart = 0; int blocksize = 0; int rowscount = 0; int i = 0; int i_ = 0; int i1_ = 0; tau = new complex[0]; if( m<=0 || n<=0 ) { return; } minmn = Math.Min(m, n); work = new complex[Math.Max(m, n)+1]; t = new complex[Math.Max(m, n)+1]; tau = new complex[minmn]; taubuf = new complex[minmn]; tmpa = new complex[m, ablas.ablascomplexblocksize(a)]; tmpt = new complex[ablas.ablascomplexblocksize(a), ablas.ablascomplexblocksize(a)]; tmpr = new complex[2*ablas.ablascomplexblocksize(a), n]; // // Blocked code // blockstart = 0; while( blockstart!=minmn ) { // // Determine block size // blocksize = minmn-blockstart; if( blocksize>ablas.ablascomplexblocksize(a) ) { blocksize = ablas.ablascomplexblocksize(a); } rowscount = m-blockstart; // // QR decomposition of submatrix. // Matrix is copied to temporary storage to solve // some TLB issues arising from non-contiguous memory // access pattern. // ablas.cmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, ref tmpa, 0, 0); cmatrixqrbasecase(ref tmpa, rowscount, blocksize, ref work, ref t, ref taubuf); ablas.cmatrixcopy(rowscount, blocksize, tmpa, 0, 0, ref a, blockstart, blockstart); i1_ = (0) - (blockstart); for(i_=blockstart; i_<=blockstart+blocksize-1;i_++) { tau[i_] = taubuf[i_+i1_]; } // // Update the rest, choose between: // a) Level 2 algorithm (when the rest of the matrix is small enough) // b) blocked algorithm, see algorithm 5 from 'A storage efficient WY // representation for products of Householder transformations', // by R. Schreiber and C. Van Loan. // if( blockstart+blocksize<=n-1 ) { if( n-blockstart-blocksize>=2*ablas.ablascomplexblocksize(a) ) { // // Prepare block reflector // cmatrixblockreflector(ref tmpa, ref taubuf, true, rowscount, blocksize, ref tmpt, ref work); // // Multiply the rest of A by Q'. // // Q = E + Y*T*Y' = E + TmpA*TmpT*TmpA' // Q' = E + Y*T'*Y' = E + TmpA*TmpT'*TmpA' // ablas.cmatrixgemm(blocksize, n-blockstart-blocksize, rowscount, 1.0, tmpa, 0, 0, 2, a, blockstart, blockstart+blocksize, 0, 0.0, tmpr, 0, 0); ablas.cmatrixgemm(blocksize, n-blockstart-blocksize, blocksize, 1.0, tmpt, 0, 0, 2, tmpr, 0, 0, 0, 0.0, tmpr, blocksize, 0); ablas.cmatrixgemm(rowscount, n-blockstart-blocksize, blocksize, 1.0, tmpa, 0, 0, 0, tmpr, blocksize, 0, 0, 1.0, a, blockstart, blockstart+blocksize); } else { // // Level 2 algorithm // for(i=0; i<=blocksize-1; i++) { i1_ = (i) - (1); for(i_=1; i_<=rowscount-i;i_++) { t[i_] = tmpa[i_+i1_,i]; } t[1] = 1; creflections.complexapplyreflectionfromtheleft(ref a, math.conj(taubuf[i]), t, blockstart+i, m-1, blockstart+blocksize, n-1, ref work); } } } // // Advance // blockstart = blockstart+blocksize; } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixqr(ref complex[,] a, int m, int n, ref complex[] tau) { cmatrixqr(ref a,m,n,ref tau); } /************************************************************************* LQ decomposition of a rectangular complex matrix of size MxN COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix A whose indexes range within [0..M-1, 0..N-1] M - number of rows in matrix A. N - number of columns in matrix A. Output parameters: A - matrices Q and L in compact form Tau - array of scalar factors which are used to form matrix Q. Array whose indexes range within [0.. Min(M,N)-1] Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size MxM, L - lower triangular (or lower trapezoid) matrix of size MxN. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 *************************************************************************/ public static void cmatrixlq(ref complex[,] a, int m, int n, ref complex[] tau) { complex[] work = new complex[0]; complex[] t = new complex[0]; complex[] taubuf = new complex[0]; int minmn = 0; complex[,] tmpa = new complex[0,0]; complex[,] tmpt = new complex[0,0]; complex[,] tmpr = new complex[0,0]; int blockstart = 0; int blocksize = 0; int columnscount = 0; int i = 0; int i_ = 0; int i1_ = 0; tau = new complex[0]; if( m<=0 || n<=0 ) { return; } minmn = Math.Min(m, n); work = new complex[Math.Max(m, n)+1]; t = new complex[Math.Max(m, n)+1]; tau = new complex[minmn]; taubuf = new complex[minmn]; tmpa = new complex[ablas.ablascomplexblocksize(a), n]; tmpt = new complex[ablas.ablascomplexblocksize(a), ablas.ablascomplexblocksize(a)]; tmpr = new complex[m, 2*ablas.ablascomplexblocksize(a)]; // // Blocked code // blockstart = 0; while( blockstart!=minmn ) { // // Determine block size // blocksize = minmn-blockstart; if( blocksize>ablas.ablascomplexblocksize(a) ) { blocksize = ablas.ablascomplexblocksize(a); } columnscount = n-blockstart; // // LQ decomposition of submatrix. // Matrix is copied to temporary storage to solve // some TLB issues arising from non-contiguous memory // access pattern. // ablas.cmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, ref tmpa, 0, 0); cmatrixlqbasecase(ref tmpa, blocksize, columnscount, ref work, ref t, ref taubuf); ablas.cmatrixcopy(blocksize, columnscount, tmpa, 0, 0, ref a, blockstart, blockstart); i1_ = (0) - (blockstart); for(i_=blockstart; i_<=blockstart+blocksize-1;i_++) { tau[i_] = taubuf[i_+i1_]; } // // Update the rest, choose between: // a) Level 2 algorithm (when the rest of the matrix is small enough) // b) blocked algorithm, see algorithm 5 from 'A storage efficient WY // representation for products of Householder transformations', // by R. Schreiber and C. Van Loan. // if( blockstart+blocksize<=m-1 ) { if( m-blockstart-blocksize>=2*ablas.ablascomplexblocksize(a) ) { // // Prepare block reflector // cmatrixblockreflector(ref tmpa, ref taubuf, false, columnscount, blocksize, ref tmpt, ref work); // // Multiply the rest of A by Q. // // Q = E + Y*T*Y' = E + TmpA'*TmpT*TmpA // ablas.cmatrixgemm(m-blockstart-blocksize, blocksize, columnscount, 1.0, a, blockstart+blocksize, blockstart, 0, tmpa, 0, 0, 2, 0.0, tmpr, 0, 0); ablas.cmatrixgemm(m-blockstart-blocksize, blocksize, blocksize, 1.0, tmpr, 0, 0, 0, tmpt, 0, 0, 0, 0.0, tmpr, 0, blocksize); ablas.cmatrixgemm(m-blockstart-blocksize, columnscount, blocksize, 1.0, tmpr, 0, blocksize, 0, tmpa, 0, 0, 0, 1.0, a, blockstart+blocksize, blockstart); } else { // // Level 2 algorithm // for(i=0; i<=blocksize-1; i++) { i1_ = (i) - (1); for(i_=1; i_<=columnscount-i;i_++) { t[i_] = math.conj(tmpa[i,i_+i1_]); } t[1] = 1; creflections.complexapplyreflectionfromtheright(ref a, taubuf[i], ref t, blockstart+blocksize, m-1, blockstart+i, n-1, ref work); } } } // // Advance // blockstart = blockstart+blocksize; } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixlq(ref complex[,] a, int m, int n, ref complex[] tau) { cmatrixlq(ref a,m,n,ref tau); } /************************************************************************* Partial unpacking of matrix Q from the QR decomposition of a matrix A COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrices Q and R in compact form. Output of RMatrixQR subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Tau - scalar factors which are used to form Q. Output of the RMatrixQR subroutine. QColumns - required number of columns of matrix Q. M>=QColumns>=0. Output parameters: Q - first QColumns columns of matrix Q. Array whose indexes range within [0..M-1, 0..QColumns-1]. If QColumns=0, the array remains unchanged. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixqrunpackq(double[,] a, int m, int n, double[] tau, int qcolumns, ref double[,] q) { double[] work = new double[0]; double[] t = new double[0]; double[] taubuf = new double[0]; int minmn = 0; int refcnt = 0; double[,] tmpa = new double[0,0]; double[,] tmpt = new double[0,0]; double[,] tmpr = new double[0,0]; int blockstart = 0; int blocksize = 0; int rowscount = 0; int i = 0; int j = 0; int i_ = 0; int i1_ = 0; q = new double[0,0]; alglib.ap.assert(qcolumns<=m, "UnpackQFromQR: QColumns>M!"); if( (m<=0 || n<=0) || qcolumns<=0 ) { return; } // // init // minmn = Math.Min(m, n); refcnt = Math.Min(minmn, qcolumns); q = new double[m, qcolumns]; for(i=0; i<=m-1; i++) { for(j=0; j<=qcolumns-1; j++) { if( i==j ) { q[i,j] = 1; } else { q[i,j] = 0; } } } work = new double[Math.Max(m, qcolumns)+1]; t = new double[Math.Max(m, qcolumns)+1]; taubuf = new double[minmn]; tmpa = new double[m, ablas.ablasblocksize(a)]; tmpt = new double[ablas.ablasblocksize(a), 2*ablas.ablasblocksize(a)]; tmpr = new double[2*ablas.ablasblocksize(a), qcolumns]; // // Blocked code // blockstart = ablas.ablasblocksize(a)*(refcnt/ablas.ablasblocksize(a)); blocksize = refcnt-blockstart; while( blockstart>=0 ) { rowscount = m-blockstart; if( blocksize>0 ) { // // Copy current block // ablas.rmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, ref tmpa, 0, 0); i1_ = (blockstart) - (0); for(i_=0; i_<=blocksize-1;i_++) { taubuf[i_] = tau[i_+i1_]; } // // Update, choose between: // a) Level 2 algorithm (when the rest of the matrix is small enough) // b) blocked algorithm, see algorithm 5 from 'A storage efficient WY // representation for products of Householder transformations', // by R. Schreiber and C. Van Loan. // if( qcolumns>=2*ablas.ablasblocksize(a) ) { // // Prepare block reflector // rmatrixblockreflector(ref tmpa, ref taubuf, true, rowscount, blocksize, ref tmpt, ref work); // // Multiply matrix by Q. // // Q = E + Y*T*Y' = E + TmpA*TmpT*TmpA' // ablas.rmatrixgemm(blocksize, qcolumns, rowscount, 1.0, tmpa, 0, 0, 1, q, blockstart, 0, 0, 0.0, tmpr, 0, 0); ablas.rmatrixgemm(blocksize, qcolumns, blocksize, 1.0, tmpt, 0, 0, 0, tmpr, 0, 0, 0, 0.0, tmpr, blocksize, 0); ablas.rmatrixgemm(rowscount, qcolumns, blocksize, 1.0, tmpa, 0, 0, 0, tmpr, blocksize, 0, 0, 1.0, q, blockstart, 0); } else { // // Level 2 algorithm // for(i=blocksize-1; i>=0; i--) { i1_ = (i) - (1); for(i_=1; i_<=rowscount-i;i_++) { t[i_] = tmpa[i_+i1_,i]; } t[1] = 1; reflections.applyreflectionfromtheleft(ref q, taubuf[i], t, blockstart+i, m-1, 0, qcolumns-1, ref work); } } } // // Advance // blockstart = blockstart-ablas.ablasblocksize(a); blocksize = ablas.ablasblocksize(a); } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_rmatrixqrunpackq(double[,] a, int m, int n, double[] tau, int qcolumns, ref double[,] q) { rmatrixqrunpackq(a,m,n,tau,qcolumns,ref q); } /************************************************************************* Unpacking of matrix R from the QR decomposition of a matrix A Input parameters: A - matrices Q and R in compact form. Output of RMatrixQR subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Output parameters: R - matrix R, array[0..M-1, 0..N-1]. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixqrunpackr(double[,] a, int m, int n, ref double[,] r) { int i = 0; int k = 0; int i_ = 0; r = new double[0,0]; if( m<=0 || n<=0 ) { return; } k = Math.Min(m, n); r = new double[m, n]; for(i=0; i<=n-1; i++) { r[0,i] = 0; } for(i=1; i<=m-1; i++) { for(i_=0; i_<=n-1;i_++) { r[i,i_] = r[0,i_]; } } for(i=0; i<=k-1; i++) { for(i_=i; i_<=n-1;i_++) { r[i,i_] = a[i,i_]; } } } /************************************************************************* Partial unpacking of matrix Q from the LQ decomposition of a matrix A COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrices L and Q in compact form. Output of RMatrixLQ subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Tau - scalar factors which are used to form Q. Output of the RMatrixLQ subroutine. QRows - required number of rows in matrix Q. N>=QRows>=0. Output parameters: Q - first QRows rows of matrix Q. Array whose indexes range within [0..QRows-1, 0..N-1]. If QRows=0, the array remains unchanged. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixlqunpackq(double[,] a, int m, int n, double[] tau, int qrows, ref double[,] q) { double[] work = new double[0]; double[] t = new double[0]; double[] taubuf = new double[0]; int minmn = 0; int refcnt = 0; double[,] tmpa = new double[0,0]; double[,] tmpt = new double[0,0]; double[,] tmpr = new double[0,0]; int blockstart = 0; int blocksize = 0; int columnscount = 0; int i = 0; int j = 0; int i_ = 0; int i1_ = 0; q = new double[0,0]; alglib.ap.assert(qrows<=n, "RMatrixLQUnpackQ: QRows>N!"); if( (m<=0 || n<=0) || qrows<=0 ) { return; } // // init // minmn = Math.Min(m, n); refcnt = Math.Min(minmn, qrows); work = new double[Math.Max(m, n)+1]; t = new double[Math.Max(m, n)+1]; taubuf = new double[minmn]; tmpa = new double[ablas.ablasblocksize(a), n]; tmpt = new double[ablas.ablasblocksize(a), 2*ablas.ablasblocksize(a)]; tmpr = new double[qrows, 2*ablas.ablasblocksize(a)]; q = new double[qrows, n]; for(i=0; i<=qrows-1; i++) { for(j=0; j<=n-1; j++) { if( i==j ) { q[i,j] = 1; } else { q[i,j] = 0; } } } // // Blocked code // blockstart = ablas.ablasblocksize(a)*(refcnt/ablas.ablasblocksize(a)); blocksize = refcnt-blockstart; while( blockstart>=0 ) { columnscount = n-blockstart; if( blocksize>0 ) { // // Copy submatrix // ablas.rmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, ref tmpa, 0, 0); i1_ = (blockstart) - (0); for(i_=0; i_<=blocksize-1;i_++) { taubuf[i_] = tau[i_+i1_]; } // // Update matrix, choose between: // a) Level 2 algorithm (when the rest of the matrix is small enough) // b) blocked algorithm, see algorithm 5 from 'A storage efficient WY // representation for products of Householder transformations', // by R. Schreiber and C. Van Loan. // if( qrows>=2*ablas.ablasblocksize(a) ) { // // Prepare block reflector // rmatrixblockreflector(ref tmpa, ref taubuf, false, columnscount, blocksize, ref tmpt, ref work); // // Multiply the rest of A by Q'. // // Q' = E + Y*T'*Y' = E + TmpA'*TmpT'*TmpA // ablas.rmatrixgemm(qrows, blocksize, columnscount, 1.0, q, 0, blockstart, 0, tmpa, 0, 0, 1, 0.0, tmpr, 0, 0); ablas.rmatrixgemm(qrows, blocksize, blocksize, 1.0, tmpr, 0, 0, 0, tmpt, 0, 0, 1, 0.0, tmpr, 0, blocksize); ablas.rmatrixgemm(qrows, columnscount, blocksize, 1.0, tmpr, 0, blocksize, 0, tmpa, 0, 0, 0, 1.0, q, 0, blockstart); } else { // // Level 2 algorithm // for(i=blocksize-1; i>=0; i--) { i1_ = (i) - (1); for(i_=1; i_<=columnscount-i;i_++) { t[i_] = tmpa[i,i_+i1_]; } t[1] = 1; reflections.applyreflectionfromtheright(ref q, taubuf[i], t, 0, qrows-1, blockstart+i, n-1, ref work); } } } // // Advance // blockstart = blockstart-ablas.ablasblocksize(a); blocksize = ablas.ablasblocksize(a); } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_rmatrixlqunpackq(double[,] a, int m, int n, double[] tau, int qrows, ref double[,] q) { rmatrixlqunpackq(a,m,n,tau,qrows,ref q); } /************************************************************************* Unpacking of matrix L from the LQ decomposition of a matrix A Input parameters: A - matrices Q and L in compact form. Output of RMatrixLQ subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Output parameters: L - matrix L, array[0..M-1, 0..N-1]. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixlqunpackl(double[,] a, int m, int n, ref double[,] l) { int i = 0; int k = 0; int i_ = 0; l = new double[0,0]; if( m<=0 || n<=0 ) { return; } l = new double[m, n]; for(i=0; i<=n-1; i++) { l[0,i] = 0; } for(i=1; i<=m-1; i++) { for(i_=0; i_<=n-1;i_++) { l[i,i_] = l[0,i_]; } } for(i=0; i<=m-1; i++) { k = Math.Min(i, n-1); for(i_=0; i_<=k;i_++) { l[i,i_] = a[i,i_]; } } } /************************************************************************* Partial unpacking of matrix Q from QR decomposition of a complex matrix A. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrices Q and R in compact form. Output of CMatrixQR subroutine . M - number of rows in matrix A. M>=0. N - number of columns in matrix A. N>=0. Tau - scalar factors which are used to form Q. Output of CMatrixQR subroutine . QColumns - required number of columns in matrix Q. M>=QColumns>=0. Output parameters: Q - first QColumns columns of matrix Q. Array whose index ranges within [0..M-1, 0..QColumns-1]. If QColumns=0, array isn't changed. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixqrunpackq(complex[,] a, int m, int n, complex[] tau, int qcolumns, ref complex[,] q) { complex[] work = new complex[0]; complex[] t = new complex[0]; complex[] taubuf = new complex[0]; int minmn = 0; int refcnt = 0; complex[,] tmpa = new complex[0,0]; complex[,] tmpt = new complex[0,0]; complex[,] tmpr = new complex[0,0]; int blockstart = 0; int blocksize = 0; int rowscount = 0; int i = 0; int j = 0; int i_ = 0; int i1_ = 0; q = new complex[0,0]; alglib.ap.assert(qcolumns<=m, "UnpackQFromQR: QColumns>M!"); if( m<=0 || n<=0 ) { return; } // // init // minmn = Math.Min(m, n); refcnt = Math.Min(minmn, qcolumns); work = new complex[Math.Max(m, n)+1]; t = new complex[Math.Max(m, n)+1]; taubuf = new complex[minmn]; tmpa = new complex[m, ablas.ablascomplexblocksize(a)]; tmpt = new complex[ablas.ablascomplexblocksize(a), ablas.ablascomplexblocksize(a)]; tmpr = new complex[2*ablas.ablascomplexblocksize(a), qcolumns]; q = new complex[m, qcolumns]; for(i=0; i<=m-1; i++) { for(j=0; j<=qcolumns-1; j++) { if( i==j ) { q[i,j] = 1; } else { q[i,j] = 0; } } } // // Blocked code // blockstart = ablas.ablascomplexblocksize(a)*(refcnt/ablas.ablascomplexblocksize(a)); blocksize = refcnt-blockstart; while( blockstart>=0 ) { rowscount = m-blockstart; if( blocksize>0 ) { // // QR decomposition of submatrix. // Matrix is copied to temporary storage to solve // some TLB issues arising from non-contiguous memory // access pattern. // ablas.cmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, ref tmpa, 0, 0); i1_ = (blockstart) - (0); for(i_=0; i_<=blocksize-1;i_++) { taubuf[i_] = tau[i_+i1_]; } // // Update matrix, choose between: // a) Level 2 algorithm (when the rest of the matrix is small enough) // b) blocked algorithm, see algorithm 5 from 'A storage efficient WY // representation for products of Householder transformations', // by R. Schreiber and C. Van Loan. // if( qcolumns>=2*ablas.ablascomplexblocksize(a) ) { // // Prepare block reflector // cmatrixblockreflector(ref tmpa, ref taubuf, true, rowscount, blocksize, ref tmpt, ref work); // // Multiply the rest of A by Q. // // Q = E + Y*T*Y' = E + TmpA*TmpT*TmpA' // ablas.cmatrixgemm(blocksize, qcolumns, rowscount, 1.0, tmpa, 0, 0, 2, q, blockstart, 0, 0, 0.0, tmpr, 0, 0); ablas.cmatrixgemm(blocksize, qcolumns, blocksize, 1.0, tmpt, 0, 0, 0, tmpr, 0, 0, 0, 0.0, tmpr, blocksize, 0); ablas.cmatrixgemm(rowscount, qcolumns, blocksize, 1.0, tmpa, 0, 0, 0, tmpr, blocksize, 0, 0, 1.0, q, blockstart, 0); } else { // // Level 2 algorithm // for(i=blocksize-1; i>=0; i--) { i1_ = (i) - (1); for(i_=1; i_<=rowscount-i;i_++) { t[i_] = tmpa[i_+i1_,i]; } t[1] = 1; creflections.complexapplyreflectionfromtheleft(ref q, taubuf[i], t, blockstart+i, m-1, 0, qcolumns-1, ref work); } } } // // Advance // blockstart = blockstart-ablas.ablascomplexblocksize(a); blocksize = ablas.ablascomplexblocksize(a); } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixqrunpackq(complex[,] a, int m, int n, complex[] tau, int qcolumns, ref complex[,] q) { cmatrixqrunpackq(a,m,n,tau,qcolumns,ref q); } /************************************************************************* Unpacking of matrix R from the QR decomposition of a matrix A Input parameters: A - matrices Q and R in compact form. Output of CMatrixQR subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Output parameters: R - matrix R, array[0..M-1, 0..N-1]. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixqrunpackr(complex[,] a, int m, int n, ref complex[,] r) { int i = 0; int k = 0; int i_ = 0; r = new complex[0,0]; if( m<=0 || n<=0 ) { return; } k = Math.Min(m, n); r = new complex[m, n]; for(i=0; i<=n-1; i++) { r[0,i] = 0; } for(i=1; i<=m-1; i++) { for(i_=0; i_<=n-1;i_++) { r[i,i_] = r[0,i_]; } } for(i=0; i<=k-1; i++) { for(i_=i; i_<=n-1;i_++) { r[i,i_] = a[i,i_]; } } } /************************************************************************* Partial unpacking of matrix Q from LQ decomposition of a complex matrix A. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that QP decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=512, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrices Q and R in compact form. Output of CMatrixLQ subroutine . M - number of rows in matrix A. M>=0. N - number of columns in matrix A. N>=0. Tau - scalar factors which are used to form Q. Output of CMatrixLQ subroutine . QRows - required number of rows in matrix Q. N>=QColumns>=0. Output parameters: Q - first QRows rows of matrix Q. Array whose index ranges within [0..QRows-1, 0..N-1]. If QRows=0, array isn't changed. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixlqunpackq(complex[,] a, int m, int n, complex[] tau, int qrows, ref complex[,] q) { complex[] work = new complex[0]; complex[] t = new complex[0]; complex[] taubuf = new complex[0]; int minmn = 0; int refcnt = 0; complex[,] tmpa = new complex[0,0]; complex[,] tmpt = new complex[0,0]; complex[,] tmpr = new complex[0,0]; int blockstart = 0; int blocksize = 0; int columnscount = 0; int i = 0; int j = 0; int i_ = 0; int i1_ = 0; q = new complex[0,0]; if( m<=0 || n<=0 ) { return; } // // Init // minmn = Math.Min(m, n); refcnt = Math.Min(minmn, qrows); work = new complex[Math.Max(m, n)+1]; t = new complex[Math.Max(m, n)+1]; taubuf = new complex[minmn]; tmpa = new complex[ablas.ablascomplexblocksize(a), n]; tmpt = new complex[ablas.ablascomplexblocksize(a), ablas.ablascomplexblocksize(a)]; tmpr = new complex[qrows, 2*ablas.ablascomplexblocksize(a)]; q = new complex[qrows, n]; for(i=0; i<=qrows-1; i++) { for(j=0; j<=n-1; j++) { if( i==j ) { q[i,j] = 1; } else { q[i,j] = 0; } } } // // Blocked code // blockstart = ablas.ablascomplexblocksize(a)*(refcnt/ablas.ablascomplexblocksize(a)); blocksize = refcnt-blockstart; while( blockstart>=0 ) { columnscount = n-blockstart; if( blocksize>0 ) { // // LQ decomposition of submatrix. // Matrix is copied to temporary storage to solve // some TLB issues arising from non-contiguous memory // access pattern. // ablas.cmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, ref tmpa, 0, 0); i1_ = (blockstart) - (0); for(i_=0; i_<=blocksize-1;i_++) { taubuf[i_] = tau[i_+i1_]; } // // Update matrix, choose between: // a) Level 2 algorithm (when the rest of the matrix is small enough) // b) blocked algorithm, see algorithm 5 from 'A storage efficient WY // representation for products of Householder transformations', // by R. Schreiber and C. Van Loan. // if( qrows>=2*ablas.ablascomplexblocksize(a) ) { // // Prepare block reflector // cmatrixblockreflector(ref tmpa, ref taubuf, false, columnscount, blocksize, ref tmpt, ref work); // // Multiply the rest of A by Q'. // // Q' = E + Y*T'*Y' = E + TmpA'*TmpT'*TmpA // ablas.cmatrixgemm(qrows, blocksize, columnscount, 1.0, q, 0, blockstart, 0, tmpa, 0, 0, 2, 0.0, tmpr, 0, 0); ablas.cmatrixgemm(qrows, blocksize, blocksize, 1.0, tmpr, 0, 0, 0, tmpt, 0, 0, 2, 0.0, tmpr, 0, blocksize); ablas.cmatrixgemm(qrows, columnscount, blocksize, 1.0, tmpr, 0, blocksize, 0, tmpa, 0, 0, 0, 1.0, q, 0, blockstart); } else { // // Level 2 algorithm // for(i=blocksize-1; i>=0; i--) { i1_ = (i) - (1); for(i_=1; i_<=columnscount-i;i_++) { t[i_] = math.conj(tmpa[i,i_+i1_]); } t[1] = 1; creflections.complexapplyreflectionfromtheright(ref q, math.conj(taubuf[i]), ref t, 0, qrows-1, blockstart+i, n-1, ref work); } } } // // Advance // blockstart = blockstart-ablas.ablascomplexblocksize(a); blocksize = ablas.ablascomplexblocksize(a); } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixlqunpackq(complex[,] a, int m, int n, complex[] tau, int qrows, ref complex[,] q) { cmatrixlqunpackq(a,m,n,tau,qrows,ref q); } /************************************************************************* Unpacking of matrix L from the LQ decomposition of a matrix A Input parameters: A - matrices Q and L in compact form. Output of CMatrixLQ subroutine. M - number of rows in given matrix A. M>=0. N - number of columns in given matrix A. N>=0. Output parameters: L - matrix L, array[0..M-1, 0..N-1]. -- ALGLIB routine -- 17.02.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixlqunpackl(complex[,] a, int m, int n, ref complex[,] l) { int i = 0; int k = 0; int i_ = 0; l = new complex[0,0]; if( m<=0 || n<=0 ) { return; } l = new complex[m, n]; for(i=0; i<=n-1; i++) { l[0,i] = 0; } for(i=1; i<=m-1; i++) { for(i_=0; i_<=n-1;i_++) { l[i,i_] = l[0,i_]; } } for(i=0; i<=m-1; i++) { k = Math.Min(i, n-1); for(i_=0; i_<=k;i_++) { l[i,i_] = a[i,i_]; } } } /************************************************************************* Base case for real QR -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994. Sergey Bochkanov, ALGLIB project, translation from FORTRAN to pseudocode, 2007-2010. *************************************************************************/ public static void rmatrixqrbasecase(ref double[,] a, int m, int n, ref double[] work, ref double[] t, ref double[] tau) { int i = 0; int k = 0; int minmn = 0; double tmp = 0; int i_ = 0; int i1_ = 0; minmn = Math.Min(m, n); // // Test the input arguments // k = minmn; for(i=0; i<=k-1; i++) { // // Generate elementary reflector H(i) to annihilate A(i+1:m,i) // i1_ = (i) - (1); for(i_=1; i_<=m-i;i_++) { t[i_] = a[i_+i1_,i]; } reflections.generatereflection(ref t, m-i, ref tmp); tau[i] = tmp; i1_ = (1) - (i); for(i_=i; i_<=m-1;i_++) { a[i_,i] = t[i_+i1_]; } t[1] = 1; if( i=N, B is the upper bidiagonal MxN matrix and is stored in the corresponding elements of matrix A. Matrix Q is represented as a product of elementary reflections Q = H(0)*H(1)*...*H(n-1), where H(i) = 1-tau*v*v'. Here tau is a scalar which is stored in TauQ[i], and vector v has the following structure: v(0:i-1)=0, v(i)=1, v(i+1:m-1) is stored in elements A(i+1:m-1,i). Matrix P is as follows: P = G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i], u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1). If M n): m=5, n=6 (m < n): ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) ( v1 v2 v3 v4 v5 ) Here vi and ui are vectors which form H(i) and G(i), and d and e - are the diagonal and off-diagonal elements of matrix B. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994. Sergey Bochkanov, ALGLIB project, translation from FORTRAN to pseudocode, 2007-2010. *************************************************************************/ public static void rmatrixbd(ref double[,] a, int m, int n, ref double[] tauq, ref double[] taup) { double[] work = new double[0]; double[] t = new double[0]; int maxmn = 0; int i = 0; double ltau = 0; int i_ = 0; int i1_ = 0; tauq = new double[0]; taup = new double[0]; // // Prepare // if( n<=0 || m<=0 ) { return; } maxmn = Math.Max(m, n); work = new double[maxmn+1]; t = new double[maxmn+1]; if( m>=n ) { tauq = new double[n]; taup = new double[n]; for(i=0; i<=n-1; i++) { tauq[i] = 0.0; taup[i] = 0.0; } } else { tauq = new double[m]; taup = new double[m]; for(i=0; i<=m-1; i++) { tauq[i] = 0.0; taup[i] = 0.0; } } // // Try to use MKL code // // NOTE: buffers Work[] and T[] are used for temporary storage of diagonals; // because they are present in A[], we do not use them. // if( ablasmkl.rmatrixbdmkl(a, m, n, work, t, tauq, taup) ) { return; } // // ALGLIB code // if( m>=n ) { // // Reduce to upper bidiagonal form // for(i=0; i<=n-1; i++) { // // Generate elementary reflector H(i) to annihilate A(i+1:m-1,i) // i1_ = (i) - (1); for(i_=1; i_<=m-i;i_++) { t[i_] = a[i_+i1_,i]; } reflections.generatereflection(ref t, m-i, ref ltau); tauq[i] = ltau; i1_ = (1) - (i); for(i_=i; i_<=m-1;i_++) { a[i_,i] = t[i_+i1_]; } t[1] = 1; // // Apply H(i) to A(i:m-1,i+1:n-1) from the left // reflections.applyreflectionfromtheleft(ref a, ltau, t, i, m-1, i+1, n-1, ref work); if( i=QColumns>=0. Output parameters: Q - first QColumns columns of matrix Q. Array[0..M-1, 0..QColumns-1] If QColumns=0, the array is not modified. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixbdunpackq(double[,] qp, int m, int n, double[] tauq, int qcolumns, ref double[,] q) { int i = 0; int j = 0; q = new double[0,0]; alglib.ap.assert(qcolumns<=m, "RMatrixBDUnpackQ: QColumns>M!"); alglib.ap.assert(qcolumns>=0, "RMatrixBDUnpackQ: QColumns<0!"); if( (m==0 || n==0) || qcolumns==0 ) { return; } // // prepare Q // q = new double[m, qcolumns]; for(i=0; i<=m-1; i++) { for(j=0; j<=qcolumns-1; j++) { if( i==j ) { q[i,j] = 1; } else { q[i,j] = 0; } } } // // Calculate // rmatrixbdmultiplybyq(qp, m, n, tauq, ref q, m, qcolumns, false, false); } /************************************************************************* Multiplication by matrix Q which reduces matrix A to bidiagonal form. The algorithm allows pre- or post-multiply by Q or Q'. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: QP - matrices Q and P in compact form. Output of ToBidiagonal subroutine. M - number of rows in matrix A. N - number of columns in matrix A. TAUQ - scalar factors which are used to form Q. Output of ToBidiagonal subroutine. Z - multiplied matrix. array[0..ZRows-1,0..ZColumns-1] ZRows - number of rows in matrix Z. If FromTheRight=False, ZRows=M, otherwise ZRows can be arbitrary. ZColumns - number of columns in matrix Z. If FromTheRight=True, ZColumns=M, otherwise ZColumns can be arbitrary. FromTheRight - pre- or post-multiply. DoTranspose - multiply by Q or Q'. Output parameters: Z - product of Z and Q. Array[0..ZRows-1,0..ZColumns-1] If ZRows=0 or ZColumns=0, the array is not modified. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixbdmultiplybyq(double[,] qp, int m, int n, double[] tauq, ref double[,] z, int zrows, int zcolumns, bool fromtheright, bool dotranspose) { int i = 0; int i1 = 0; int i2 = 0; int istep = 0; double[] v = new double[0]; double[] work = new double[0]; double[] dummy = new double[0]; int mx = 0; int i_ = 0; int i1_ = 0; if( ((m<=0 || n<=0) || zrows<=0) || zcolumns<=0 ) { return; } alglib.ap.assert((fromtheright && zcolumns==m) || (!fromtheright && zrows==m), "RMatrixBDMultiplyByQ: incorrect Z size!"); // // Try to use MKL code // if( ablasmkl.rmatrixbdmultiplybymkl(qp, m, n, tauq, dummy, z, zrows, zcolumns, true, fromtheright, dotranspose) ) { return; } // // init // mx = Math.Max(m, n); mx = Math.Max(mx, zrows); mx = Math.Max(mx, zcolumns); v = new double[mx+1]; work = new double[mx+1]; if( m>=n ) { // // setup // if( fromtheright ) { i1 = 0; i2 = n-1; istep = 1; } else { i1 = n-1; i2 = 0; istep = -1; } if( dotranspose ) { i = i1; i1 = i2; i2 = i; istep = -istep; } // // Process // i = i1; do { i1_ = (i) - (1); for(i_=1; i_<=m-i;i_++) { v[i_] = qp[i_+i1_,i]; } v[1] = 1; if( fromtheright ) { reflections.applyreflectionfromtheright(ref z, tauq[i], v, 0, zrows-1, i, m-1, ref work); } else { reflections.applyreflectionfromtheleft(ref z, tauq[i], v, i, m-1, 0, zcolumns-1, ref work); } i = i+istep; } while( i!=i2+istep ); } else { // // setup // if( fromtheright ) { i1 = 0; i2 = m-2; istep = 1; } else { i1 = m-2; i2 = 0; istep = -1; } if( dotranspose ) { i = i1; i1 = i2; i2 = i; istep = -istep; } // // Process // if( m-1>0 ) { i = i1; do { i1_ = (i+1) - (1); for(i_=1; i_<=m-i-1;i_++) { v[i_] = qp[i_+i1_,i]; } v[1] = 1; if( fromtheright ) { reflections.applyreflectionfromtheright(ref z, tauq[i], v, 0, zrows-1, i+1, m-1, ref work); } else { reflections.applyreflectionfromtheleft(ref z, tauq[i], v, i+1, m-1, 0, zcolumns-1, ref work); } i = i+istep; } while( i!=i2+istep ); } } } /************************************************************************* Unpacking matrix P which reduces matrix A to bidiagonal form. The subroutine returns transposed matrix P. Input parameters: QP - matrices Q and P in compact form. Output of ToBidiagonal subroutine. M - number of rows in matrix A. N - number of columns in matrix A. TAUP - scalar factors which are used to form P. Output of ToBidiagonal subroutine. PTRows - required number of rows of matrix P^T. N >= PTRows >= 0. Output parameters: PT - first PTRows columns of matrix P^T Array[0..PTRows-1, 0..N-1] If PTRows=0, the array is not modified. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixbdunpackpt(double[,] qp, int m, int n, double[] taup, int ptrows, ref double[,] pt) { int i = 0; int j = 0; pt = new double[0,0]; alglib.ap.assert(ptrows<=n, "RMatrixBDUnpackPT: PTRows>N!"); alglib.ap.assert(ptrows>=0, "RMatrixBDUnpackPT: PTRows<0!"); if( (m==0 || n==0) || ptrows==0 ) { return; } // // prepare PT // pt = new double[ptrows, n]; for(i=0; i<=ptrows-1; i++) { for(j=0; j<=n-1; j++) { if( i==j ) { pt[i,j] = 1; } else { pt[i,j] = 0; } } } // // Calculate // rmatrixbdmultiplybyp(qp, m, n, taup, ref pt, ptrows, n, true, true); } /************************************************************************* Multiplication by matrix P which reduces matrix A to bidiagonal form. The algorithm allows pre- or post-multiply by P or P'. Input parameters: QP - matrices Q and P in compact form. Output of RMatrixBD subroutine. M - number of rows in matrix A. N - number of columns in matrix A. TAUP - scalar factors which are used to form P. Output of RMatrixBD subroutine. Z - multiplied matrix. Array whose indexes range within [0..ZRows-1,0..ZColumns-1]. ZRows - number of rows in matrix Z. If FromTheRight=False, ZRows=N, otherwise ZRows can be arbitrary. ZColumns - number of columns in matrix Z. If FromTheRight=True, ZColumns=N, otherwise ZColumns can be arbitrary. FromTheRight - pre- or post-multiply. DoTranspose - multiply by P or P'. Output parameters: Z - product of Z and P. Array whose indexes range within [0..ZRows-1,0..ZColumns-1]. If ZRows=0 or ZColumns=0, the array is not modified. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixbdmultiplybyp(double[,] qp, int m, int n, double[] taup, ref double[,] z, int zrows, int zcolumns, bool fromtheright, bool dotranspose) { int i = 0; double[] v = new double[0]; double[] work = new double[0]; double[] dummy = new double[0]; int mx = 0; int i1 = 0; int i2 = 0; int istep = 0; int i_ = 0; int i1_ = 0; if( ((m<=0 || n<=0) || zrows<=0) || zcolumns<=0 ) { return; } alglib.ap.assert((fromtheright && zcolumns==n) || (!fromtheright && zrows==n), "RMatrixBDMultiplyByP: incorrect Z size!"); // // init // mx = Math.Max(m, n); mx = Math.Max(mx, zrows); mx = Math.Max(mx, zcolumns); v = new double[mx+1]; work = new double[mx+1]; if( m>=n ) { // // setup // if( fromtheright ) { i1 = n-2; i2 = 0; istep = -1; } else { i1 = 0; i2 = n-2; istep = 1; } if( !dotranspose ) { i = i1; i1 = i2; i2 = i; istep = -istep; } // // Process // if( n-1>0 ) { i = i1; do { i1_ = (i+1) - (1); for(i_=1; i_<=n-1-i;i_++) { v[i_] = qp[i,i_+i1_]; } v[1] = 1; if( fromtheright ) { reflections.applyreflectionfromtheright(ref z, taup[i], v, 0, zrows-1, i+1, n-1, ref work); } else { reflections.applyreflectionfromtheleft(ref z, taup[i], v, i+1, n-1, 0, zcolumns-1, ref work); } i = i+istep; } while( i!=i2+istep ); } } else { // // setup // if( fromtheright ) { i1 = m-1; i2 = 0; istep = -1; } else { i1 = 0; i2 = m-1; istep = 1; } if( !dotranspose ) { i = i1; i1 = i2; i2 = i; istep = -istep; } // // Process // i = i1; do { i1_ = (i) - (1); for(i_=1; i_<=n-i;i_++) { v[i_] = qp[i,i_+i1_]; } v[1] = 1; if( fromtheright ) { reflections.applyreflectionfromtheright(ref z, taup[i], v, 0, zrows-1, i, n-1, ref work); } else { reflections.applyreflectionfromtheleft(ref z, taup[i], v, i, n-1, 0, zcolumns-1, ref work); } i = i+istep; } while( i!=i2+istep ); } } /************************************************************************* Unpacking of the main and secondary diagonals of bidiagonal decomposition of matrix A. Input parameters: B - output of RMatrixBD subroutine. M - number of rows in matrix B. N - number of columns in matrix B. Output parameters: IsUpper - True, if the matrix is upper bidiagonal. otherwise IsUpper is False. D - the main diagonal. Array whose index ranges within [0..Min(M,N)-1]. E - the secondary diagonal (upper or lower, depending on the value of IsUpper). Array index ranges within [0..Min(M,N)-1], the last element is not used. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixbdunpackdiagonals(double[,] b, int m, int n, ref bool isupper, ref double[] d, ref double[] e) { int i = 0; isupper = new bool(); d = new double[0]; e = new double[0]; isupper = m>=n; if( m<=0 || n<=0 ) { return; } if( isupper ) { d = new double[n]; e = new double[n]; for(i=0; i<=n-2; i++) { d[i] = b[i,i]; e[i] = b[i,i+1]; } d[n-1] = b[n-1,n-1]; } else { d = new double[m]; e = new double[m]; for(i=0; i<=m-2; i++) { d[i] = b[i,i]; e[i] = b[i+1,i]; } d[m-1] = b[m-1,m-1]; } } /************************************************************************* Reduction of a square matrix to upper Hessenberg form: Q'*A*Q = H, where Q is an orthogonal matrix, H - Hessenberg matrix. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix A with elements [0..N-1, 0..N-1] N - size of matrix A. Output parameters: A - matrices Q and P in compact form (see below). Tau - array of scalar factors which are used to form matrix Q. Array whose index ranges within [0..N-2] Matrix H is located on the main diagonal, on the lower secondary diagonal and above the main diagonal of matrix A. The elements which are used to form matrix Q are situated in array Tau and below the lower secondary diagonal of matrix A as follows: Matrix Q is represented as a product of elementary reflections Q = H(0)*H(2)*...*H(n-2), where each H(i) is given by H(i) = 1 - tau * v * (v^T) where tau is a scalar stored in Tau[I]; v - is a real vector, so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i). -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 *************************************************************************/ public static void rmatrixhessenberg(ref double[,] a, int n, ref double[] tau) { int i = 0; double v = 0; double[] t = new double[0]; double[] work = new double[0]; int i_ = 0; int i1_ = 0; tau = new double[0]; alglib.ap.assert(n>=0, "RMatrixHessenberg: incorrect N!"); // // Quick return if possible // if( n<=1 ) { return; } // // Allocate place // tau = new double[n-2+1]; t = new double[n+1]; work = new double[n-1+1]; // // MKL version // if( ablasmkl.rmatrixhessenbergmkl(a, n, tau) ) { return; } // // ALGLIB version // for(i=0; i<=n-2; i++) { // // Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { t[i_] = a[i_+i1_,i]; } reflections.generatereflection(ref t, n-i-1, ref v); i1_ = (1) - (i+1); for(i_=i+1; i_<=n-1;i_++) { a[i_,i] = t[i_+i1_]; } tau[i] = v; t[1] = 1; // // Apply H(i) to A(1:ihi,i+1:ihi) from the right // reflections.applyreflectionfromtheright(ref a, v, t, 0, n-1, i+1, n-1, ref work); // // Apply H(i) to A(i+1:ihi,i+1:n) from the left // reflections.applyreflectionfromtheleft(ref a, v, t, i+1, n-1, i+1, n-1, ref work); } } /************************************************************************* Unpacking matrix Q which reduces matrix A to upper Hessenberg form COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - output of RMatrixHessenberg subroutine. N - size of matrix A. Tau - scalar factors which are used to form Q. Output of RMatrixHessenberg subroutine. Output parameters: Q - matrix Q. Array whose indexes range within [0..N-1, 0..N-1]. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixhessenbergunpackq(double[,] a, int n, double[] tau, ref double[,] q) { int i = 0; int j = 0; double[] v = new double[0]; double[] work = new double[0]; int i_ = 0; int i1_ = 0; q = new double[0,0]; if( n==0 ) { return; } // // init // q = new double[n-1+1, n-1+1]; v = new double[n-1+1]; work = new double[n-1+1]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { if( i==j ) { q[i,j] = 1; } else { q[i,j] = 0; } } } // // MKL version // if( ablasmkl.rmatrixhessenbergunpackqmkl(a, n, tau, q) ) { return; } // // ALGLIB version: unpack Q // for(i=0; i<=n-2; i++) { // // Apply H(i) // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { v[i_] = a[i_+i1_,i]; } v[1] = 1; reflections.applyreflectionfromtheright(ref q, tau[i], v, 0, n-1, i+1, n-1, ref work); } } /************************************************************************* Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form) Input parameters: A - output of RMatrixHessenberg subroutine. N - size of matrix A. Output parameters: H - matrix H. Array whose indexes range within [0..N-1, 0..N-1]. -- ALGLIB -- 2005-2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixhessenbergunpackh(double[,] a, int n, ref double[,] h) { int i = 0; int j = 0; double[] v = new double[0]; double[] work = new double[0]; int i_ = 0; h = new double[0,0]; if( n==0 ) { return; } h = new double[n-1+1, n-1+1]; for(i=0; i<=n-1; i++) { for(j=0; j<=i-2; j++) { h[i,j] = 0; } j = Math.Max(0, i-1); for(i_=j; i_<=n-1;i_++) { h[i,i_] = a[i,i_]; } } } /************************************************************************* Reduction of a symmetric matrix which is given by its higher or lower triangular part to a tridiagonal matrix using orthogonal similarity transformation: Q'*A*Q=T. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix to be transformed array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. If IsUpper = True, then matrix A is given by its upper triangle, and the lower triangle is not used and not modified by the algorithm, and vice versa if IsUpper = False. Output parameters: A - matrices T and Q in compact form (see lower) Tau - array of factors which are forming matrices H(i) array with elements [0..N-2]. D - main diagonal of symmetric matrix T. array with elements [0..N-1]. E - secondary diagonal of symmetric matrix T. array with elements [0..N-2]. If IsUpper=True, the matrix Q is represented as a product of elementary reflectors Q = H(n-2) . . . H(2) H(0). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in A(0:i-1,i+1), and tau in TAU(i). If IsUpper=False, the matrix Q is represented as a product of elementary reflectors Q = H(0) H(2) . . . H(n-2). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = 'U': if UPLO = 'L': ( d e v1 v2 v3 ) ( d ) ( d e v2 v3 ) ( e d ) ( d e v3 ) ( v0 e d ) ( d e ) ( v0 v1 e d ) ( d ) ( v0 v1 v2 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 *************************************************************************/ public static void smatrixtd(ref double[,] a, int n, bool isupper, ref double[] tau, ref double[] d, ref double[] e) { int i = 0; double alpha = 0; double taui = 0; double v = 0; double[] t = new double[0]; double[] t2 = new double[0]; double[] t3 = new double[0]; int i_ = 0; int i1_ = 0; tau = new double[0]; d = new double[0]; e = new double[0]; if( n<=0 ) { return; } t = new double[n+1]; t2 = new double[n+1]; t3 = new double[n+1]; if( n>1 ) { tau = new double[n-2+1]; } d = new double[n-1+1]; if( n>1 ) { e = new double[n-2+1]; } // // Try to use MKL // if( ablasmkl.smatrixtdmkl(a, n, isupper, tau, d, e) ) { return; } // // ALGLIB version // if( isupper ) { // // Reduce the upper triangle of A // for(i=n-2; i>=0; i--) { // // Generate elementary reflector H() = E - tau * v * v' // if( i>=1 ) { i1_ = (0) - (2); for(i_=2; i_<=i+1;i_++) { t[i_] = a[i_+i1_,i+1]; } } t[1] = a[i,i+1]; reflections.generatereflection(ref t, i+1, ref taui); if( i>=1 ) { i1_ = (2) - (0); for(i_=0; i_<=i-1;i_++) { a[i_,i+1] = t[i_+i1_]; } } a[i,i+1] = t[1]; e[i] = a[i,i+1]; if( (double)(taui)!=(double)(0) ) { // // Apply H from both sides to A // a[i,i+1] = 1; // // Compute x := tau * A * v storing x in TAU // i1_ = (0) - (1); for(i_=1; i_<=i+1;i_++) { t[i_] = a[i_+i1_,i+1]; } sblas.symmetricmatrixvectormultiply(a, isupper, 0, i, t, taui, ref t3); i1_ = (1) - (0); for(i_=0; i_<=i;i_++) { tau[i_] = t3[i_+i1_]; } // // Compute w := x - 1/2 * tau * (x'*v) * v // v = 0.0; for(i_=0; i_<=i;i_++) { v += tau[i_]*a[i_,i+1]; } alpha = -(0.5*taui*v); for(i_=0; i_<=i;i_++) { tau[i_] = tau[i_] + alpha*a[i_,i+1]; } // // Apply the transformation as a rank-2 update: // A := A - v * w' - w * v' // i1_ = (0) - (1); for(i_=1; i_<=i+1;i_++) { t[i_] = a[i_+i1_,i+1]; } i1_ = (0) - (1); for(i_=1; i_<=i+1;i_++) { t3[i_] = tau[i_+i1_]; } sblas.symmetricrank2update(ref a, isupper, 0, i, t, t3, ref t2, -1); a[i,i+1] = e[i]; } d[i+1] = a[i+1,i+1]; tau[i] = taui; } d[0] = a[0,0]; } else { // // Reduce the lower triangle of A // for(i=0; i<=n-2; i++) { // // Generate elementary reflector H = E - tau * v * v' // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { t[i_] = a[i_+i1_,i]; } reflections.generatereflection(ref t, n-i-1, ref taui); i1_ = (1) - (i+1); for(i_=i+1; i_<=n-1;i_++) { a[i_,i] = t[i_+i1_]; } e[i] = a[i+1,i]; if( (double)(taui)!=(double)(0) ) { // // Apply H from both sides to A // a[i+1,i] = 1; // // Compute x := tau * A * v storing y in TAU // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { t[i_] = a[i_+i1_,i]; } sblas.symmetricmatrixvectormultiply(a, isupper, i+1, n-1, t, taui, ref t2); i1_ = (1) - (i); for(i_=i; i_<=n-2;i_++) { tau[i_] = t2[i_+i1_]; } // // Compute w := x - 1/2 * tau * (x'*v) * v // i1_ = (i+1)-(i); v = 0.0; for(i_=i; i_<=n-2;i_++) { v += tau[i_]*a[i_+i1_,i]; } alpha = -(0.5*taui*v); i1_ = (i+1) - (i); for(i_=i; i_<=n-2;i_++) { tau[i_] = tau[i_] + alpha*a[i_+i1_,i]; } // // Apply the transformation as a rank-2 update: // A := A - v * w' - w * v' // // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { t[i_] = a[i_+i1_,i]; } i1_ = (i) - (1); for(i_=1; i_<=n-i-1;i_++) { t2[i_] = tau[i_+i1_]; } sblas.symmetricrank2update(ref a, isupper, i+1, n-1, t, t2, ref t3, -1); a[i+1,i] = e[i]; } d[i] = a[i,i]; tau[i] = taui; } d[n-1] = a[n-1,n-1]; } } /************************************************************************* Unpacking matrix Q which reduces symmetric matrix to a tridiagonal form. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - the result of a SMatrixTD subroutine N - size of matrix A. IsUpper - storage format (a parameter of SMatrixTD subroutine) Tau - the result of a SMatrixTD subroutine Output parameters: Q - transformation matrix. array with elements [0..N-1, 0..N-1]. -- ALGLIB -- Copyright 2005-2010 by Bochkanov Sergey *************************************************************************/ public static void smatrixtdunpackq(double[,] a, int n, bool isupper, double[] tau, ref double[,] q) { int i = 0; int j = 0; double[] v = new double[0]; double[] work = new double[0]; int i_ = 0; int i1_ = 0; q = new double[0,0]; if( n==0 ) { return; } // // init // q = new double[n-1+1, n-1+1]; v = new double[n+1]; work = new double[n-1+1]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { if( i==j ) { q[i,j] = 1; } else { q[i,j] = 0; } } } // // MKL version // if( ablasmkl.smatrixtdunpackqmkl(a, n, isupper, tau, q) ) { return; } // // ALGLIB version: unpack Q // if( isupper ) { for(i=0; i<=n-2; i++) { // // Apply H(i) // i1_ = (0) - (1); for(i_=1; i_<=i+1;i_++) { v[i_] = a[i_+i1_,i+1]; } v[i+1] = 1; reflections.applyreflectionfromtheleft(ref q, tau[i], v, 0, i, 0, n-1, ref work); } } else { for(i=n-2; i>=0; i--) { // // Apply H(i) // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { v[i_] = a[i_+i1_,i]; } v[1] = 1; reflections.applyreflectionfromtheleft(ref q, tau[i], v, i+1, n-1, 0, n-1, ref work); } } } /************************************************************************* Reduction of a Hermitian matrix which is given by its higher or lower triangular part to a real tridiagonal matrix using unitary similarity transformation: Q'*A*Q = T. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix to be transformed array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. If IsUpper = True, then matrix A is given by its upper triangle, and the lower triangle is not used and not modified by the algorithm, and vice versa if IsUpper = False. Output parameters: A - matrices T and Q in compact form (see lower) Tau - array of factors which are forming matrices H(i) array with elements [0..N-2]. D - main diagonal of real symmetric matrix T. array with elements [0..N-1]. E - secondary diagonal of real symmetric matrix T. array with elements [0..N-2]. If IsUpper=True, the matrix Q is represented as a product of elementary reflectors Q = H(n-2) . . . H(2) H(0). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in A(0:i-1,i+1), and tau in TAU(i). If IsUpper=False, the matrix Q is represented as a product of elementary reflectors Q = H(0) H(2) . . . H(n-2). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = 'U': if UPLO = 'L': ( d e v1 v2 v3 ) ( d ) ( d e v2 v3 ) ( e d ) ( d e v3 ) ( v0 e d ) ( d e ) ( v0 v1 e d ) ( d ) ( v0 v1 v2 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 *************************************************************************/ public static void hmatrixtd(ref complex[,] a, int n, bool isupper, ref complex[] tau, ref double[] d, ref double[] e) { int i = 0; complex alpha = 0; complex taui = 0; complex v = 0; complex[] t = new complex[0]; complex[] t2 = new complex[0]; complex[] t3 = new complex[0]; int i_ = 0; int i1_ = 0; tau = new complex[0]; d = new double[0]; e = new double[0]; // // Init and test // if( n<=0 ) { return; } for(i=0; i<=n-1; i++) { alglib.ap.assert((double)(a[i,i].y)==(double)(0)); } if( n>1 ) { tau = new complex[n-2+1]; e = new double[n-2+1]; } d = new double[n-1+1]; t = new complex[n-1+1]; t2 = new complex[n-1+1]; t3 = new complex[n-1+1]; // // MKL version // if( ablasmkl.hmatrixtdmkl(a, n, isupper, tau, d, e) ) { return; } // // ALGLIB version // if( isupper ) { // // Reduce the upper triangle of A // a[n-1,n-1] = a[n-1,n-1].x; for(i=n-2; i>=0; i--) { // // Generate elementary reflector H = I+1 - tau * v * v' // alpha = a[i,i+1]; t[1] = alpha; if( i>=1 ) { i1_ = (0) - (2); for(i_=2; i_<=i+1;i_++) { t[i_] = a[i_+i1_,i+1]; } } creflections.complexgeneratereflection(ref t, i+1, ref taui); if( i>=1 ) { i1_ = (2) - (0); for(i_=0; i_<=i-1;i_++) { a[i_,i+1] = t[i_+i1_]; } } alpha = t[1]; e[i] = alpha.x; if( taui!=0 ) { // // Apply H(I+1) from both sides to A // a[i,i+1] = 1; // // Compute x := tau * A * v storing x in TAU // i1_ = (0) - (1); for(i_=1; i_<=i+1;i_++) { t[i_] = a[i_+i1_,i+1]; } hblas.hermitianmatrixvectormultiply(a, isupper, 0, i, t, taui, ref t2); i1_ = (1) - (0); for(i_=0; i_<=i;i_++) { tau[i_] = t2[i_+i1_]; } // // Compute w := x - 1/2 * tau * (x'*v) * v // v = 0.0; for(i_=0; i_<=i;i_++) { v += math.conj(tau[i_])*a[i_,i+1]; } alpha = -(0.5*taui*v); for(i_=0; i_<=i;i_++) { tau[i_] = tau[i_] + alpha*a[i_,i+1]; } // // Apply the transformation as a rank-2 update: // A := A - v * w' - w * v' // i1_ = (0) - (1); for(i_=1; i_<=i+1;i_++) { t[i_] = a[i_+i1_,i+1]; } i1_ = (0) - (1); for(i_=1; i_<=i+1;i_++) { t3[i_] = tau[i_+i1_]; } hblas.hermitianrank2update(ref a, isupper, 0, i, t, t3, ref t2, -1); } else { a[i,i] = a[i,i].x; } a[i,i+1] = e[i]; d[i+1] = a[i+1,i+1].x; tau[i] = taui; } d[0] = a[0,0].x; } else { // // Reduce the lower triangle of A // a[0,0] = a[0,0].x; for(i=0; i<=n-2; i++) { // // Generate elementary reflector H = I - tau * v * v' // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { t[i_] = a[i_+i1_,i]; } creflections.complexgeneratereflection(ref t, n-i-1, ref taui); i1_ = (1) - (i+1); for(i_=i+1; i_<=n-1;i_++) { a[i_,i] = t[i_+i1_]; } e[i] = a[i+1,i].x; if( taui!=0 ) { // // Apply H(i) from both sides to A(i+1:n,i+1:n) // a[i+1,i] = 1; // // Compute x := tau * A * v storing y in TAU // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { t[i_] = a[i_+i1_,i]; } hblas.hermitianmatrixvectormultiply(a, isupper, i+1, n-1, t, taui, ref t2); i1_ = (1) - (i); for(i_=i; i_<=n-2;i_++) { tau[i_] = t2[i_+i1_]; } // // Compute w := x - 1/2 * tau * (x'*v) * v // i1_ = (i+1)-(i); v = 0.0; for(i_=i; i_<=n-2;i_++) { v += math.conj(tau[i_])*a[i_+i1_,i]; } alpha = -(0.5*taui*v); i1_ = (i+1) - (i); for(i_=i; i_<=n-2;i_++) { tau[i_] = tau[i_] + alpha*a[i_+i1_,i]; } // // Apply the transformation as a rank-2 update: // A := A - v * w' - w * v' // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { t[i_] = a[i_+i1_,i]; } i1_ = (i) - (1); for(i_=1; i_<=n-i-1;i_++) { t2[i_] = tau[i_+i1_]; } hblas.hermitianrank2update(ref a, isupper, i+1, n-1, t, t2, ref t3, -1); } else { a[i+1,i+1] = a[i+1,i+1].x; } a[i+1,i] = e[i]; d[i] = a[i,i].x; tau[i] = taui; } d[n-1] = a[n-1,n-1].x; } } /************************************************************************* Unpacking matrix Q which reduces a Hermitian matrix to a real tridiagonal form. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - the result of a HMatrixTD subroutine N - size of matrix A. IsUpper - storage format (a parameter of HMatrixTD subroutine) Tau - the result of a HMatrixTD subroutine Output parameters: Q - transformation matrix. array with elements [0..N-1, 0..N-1]. -- ALGLIB -- Copyright 2005-2010 by Bochkanov Sergey *************************************************************************/ public static void hmatrixtdunpackq(complex[,] a, int n, bool isupper, complex[] tau, ref complex[,] q) { int i = 0; int j = 0; complex[] v = new complex[0]; complex[] work = new complex[0]; int i_ = 0; int i1_ = 0; q = new complex[0,0]; if( n==0 ) { return; } // // init // q = new complex[n-1+1, n-1+1]; v = new complex[n+1]; work = new complex[n-1+1]; // // MKL version // if( ablasmkl.hmatrixtdunpackqmkl(a, n, isupper, tau, q) ) { return; } // // ALGLIB version // for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { if( i==j ) { q[i,j] = 1; } else { q[i,j] = 0; } } } if( isupper ) { for(i=0; i<=n-2; i++) { // // Apply H(i) // i1_ = (0) - (1); for(i_=1; i_<=i+1;i_++) { v[i_] = a[i_+i1_,i+1]; } v[i+1] = 1; creflections.complexapplyreflectionfromtheleft(ref q, tau[i], v, 0, i, 0, n-1, ref work); } } else { for(i=n-2; i>=0; i--) { // // Apply H(i) // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { v[i_] = a[i_+i1_,i]; } v[1] = 1; creflections.complexapplyreflectionfromtheleft(ref q, tau[i], v, i+1, n-1, 0, n-1, ref work); } } } /************************************************************************* Base case for complex QR -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994. Sergey Bochkanov, ALGLIB project, translation from FORTRAN to pseudocode, 2007-2010. *************************************************************************/ private static void cmatrixqrbasecase(ref complex[,] a, int m, int n, ref complex[] work, ref complex[] t, ref complex[] tau) { int i = 0; int k = 0; int mmi = 0; int minmn = 0; complex tmp = 0; int i_ = 0; int i1_ = 0; minmn = Math.Min(m, n); if( minmn<=0 ) { return; } // // Test the input arguments // k = Math.Min(m, n); for(i=0; i<=k-1; i++) { // // Generate elementary reflector H(i) to annihilate A(i+1:m,i) // mmi = m-i; i1_ = (i) - (1); for(i_=1; i_<=mmi;i_++) { t[i_] = a[i_+i1_,i]; } creflections.complexgeneratereflection(ref t, mmi, ref tmp); tau[i] = tmp; i1_ = (1) - (i); for(i_=i; i_<=m-1;i_++) { a[i_,i] = t[i_+i1_]; } t[1] = 1; if( i0, contains matrix U*Q. VT - if NCVT>0, contains matrix (P^T)*VT. C - if NCC>0, contains matrix Q'*C. Result: True, if the algorithm has converged. False, if the algorithm hasn't converged (rare case). NOTE: multiplication U*Q is performed by means of transposition to internal buffer, multiplication and backward transposition. It helps to avoid costly columnwise operations and speed-up algorithm. Additional information: The type of convergence is controlled by the internal parameter TOL. If the parameter is greater than 0, the singular values will have relative accuracy TOL. If TOL<0, the singular values will have absolute accuracy ABS(TOL)*norm(B). By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon, where Epsilon is the machine precision. It is not recommended to use TOL less than 10*Epsilon since this will considerably slow down the algorithm and may not lead to error decreasing. History: * 31 March, 2007. changed MAXITR from 6 to 12. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999. *************************************************************************/ public static bool rmatrixbdsvd(ref double[] d, double[] e, int n, bool isupper, bool isfractionalaccuracyrequired, ref double[,] u, int nru, ref double[,] c, int ncc, ref double[,] vt, int ncvt) { bool result = new bool(); int i = 0; double[] en = new double[0]; double[] d1 = new double[0]; double[] e1 = new double[0]; int i_ = 0; int i1_ = 0; e = (double[])e.Clone(); result = false; // // Try to use MKL // en = new double[n]; for(i=0; i<=n-2; i++) { en[i] = e[i]; } en[n-1] = 0.0; if( ablasmkl.rmatrixbdsvdmkl(d, en, n, isupper, u, nru, c, ncc, vt, ncvt, ref result) ) { return result; } // // Use ALGLIB code // d1 = new double[n+1]; i1_ = (0) - (1); for(i_=1; i_<=n;i_++) { d1[i_] = d[i_+i1_]; } if( n>1 ) { e1 = new double[n-1+1]; i1_ = (0) - (1); for(i_=1; i_<=n-1;i_++) { e1[i_] = e[i_+i1_]; } } result = bidiagonalsvddecompositioninternal(d1, e1, n, isupper, isfractionalaccuracyrequired, u, 0, nru, c, 0, ncc, vt, 0, ncvt); i1_ = (1) - (0); for(i_=0; i_<=n-1;i_++) { d[i_] = d1[i_+i1_]; } return result; } public static bool bidiagonalsvddecomposition(ref double[] d, double[] e, int n, bool isupper, bool isfractionalaccuracyrequired, ref double[,] u, int nru, ref double[,] c, int ncc, ref double[,] vt, int ncvt) { bool result = new bool(); e = (double[])e.Clone(); result = bidiagonalsvddecompositioninternal(d, e, n, isupper, isfractionalaccuracyrequired, u, 1, nru, c, 1, ncc, vt, 1, ncvt); return result; } /************************************************************************* Internal working subroutine for bidiagonal decomposition *************************************************************************/ private static bool bidiagonalsvddecompositioninternal(double[] d, double[] e, int n, bool isupper, bool isfractionalaccuracyrequired, double[,] uu, int ustart, int nru, double[,] c, int cstart, int ncc, double[,] vt, int vstart, int ncvt) { bool result = new bool(); int i = 0; int idir = 0; int isub = 0; int iter = 0; int j = 0; int ll = 0; int lll = 0; int m = 0; int maxit = 0; int oldll = 0; int oldm = 0; double abse = 0; double abss = 0; double cosl = 0; double cosr = 0; double cs = 0; double eps = 0; double f = 0; double g = 0; double h = 0; double mu = 0; double oldcs = 0; double oldsn = 0; double r = 0; double shift = 0; double sigmn = 0; double sigmx = 0; double sinl = 0; double sinr = 0; double sll = 0; double smax = 0; double smin = 0; double sminl = 0; double sminoa = 0; double sn = 0; double thresh = 0; double tol = 0; double tolmul = 0; double unfl = 0; double[] work0 = new double[0]; double[] work1 = new double[0]; double[] work2 = new double[0]; double[] work3 = new double[0]; int maxitr = 0; bool matrixsplitflag = new bool(); bool iterflag = new bool(); double[] utemp = new double[0]; double[] vttemp = new double[0]; double[] ctemp = new double[0]; double[] etemp = new double[0]; double[,] ut = new double[0,0]; bool fwddir = new bool(); double tmp = 0; int mm1 = 0; int mm0 = 0; bool bchangedir = new bool(); int uend = 0; int cend = 0; int vend = 0; int i_ = 0; e = (double[])e.Clone(); result = true; if( n==0 ) { return result; } if( n==1 ) { if( (double)(d[1])<(double)(0) ) { d[1] = -d[1]; if( ncvt>0 ) { for(i_=vstart; i_<=vstart+ncvt-1;i_++) { vt[vstart,i_] = -1*vt[vstart,i_]; } } } return result; } // // these initializers are not really necessary, // but without them compiler complains about uninitialized locals // ll = 0; oldsn = 0; // // init // work0 = new double[n-1+1]; work1 = new double[n-1+1]; work2 = new double[n-1+1]; work3 = new double[n-1+1]; uend = ustart+Math.Max(nru-1, 0); vend = vstart+Math.Max(ncvt-1, 0); cend = cstart+Math.Max(ncc-1, 0); utemp = new double[uend+1]; vttemp = new double[vend+1]; ctemp = new double[cend+1]; maxitr = 12; fwddir = true; if( nru>0 ) { ut = new double[ustart+n, ustart+nru]; ablas.rmatrixtranspose(nru, n, uu, ustart, ustart, ut, ustart, ustart); } // // resize E from N-1 to N // etemp = new double[n+1]; for(i=1; i<=n-1; i++) { etemp[i] = e[i]; } e = new double[n+1]; for(i=1; i<=n-1; i++) { e[i] = etemp[i]; } e[n] = 0; idir = 0; // // Get machine constants // eps = math.machineepsilon; unfl = math.minrealnumber; // // If matrix lower bidiagonal, rotate to be upper bidiagonal // by applying Givens rotations on the left // if( !isupper ) { for(i=1; i<=n-1; i++) { rotations.generaterotation(d[i], e[i], ref cs, ref sn, ref r); d[i] = r; e[i] = sn*d[i+1]; d[i+1] = cs*d[i+1]; work0[i] = cs; work1[i] = sn; } // // Update singular vectors if desired // if( nru>0 ) { rotations.applyrotationsfromtheleft(fwddir, 1+ustart-1, n+ustart-1, ustart, uend, work0, work1, ut, utemp); } if( ncc>0 ) { rotations.applyrotationsfromtheleft(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, work0, work1, c, ctemp); } } // // Compute singular values to relative accuracy TOL // (By setting TOL to be negative, algorithm will compute // singular values to absolute accuracy ABS(TOL)*norm(input matrix)) // tolmul = Math.Max(10, Math.Min(100, Math.Pow(eps, -0.125))); tol = tolmul*eps; // // Compute approximate maximum, minimum singular values // smax = 0; for(i=1; i<=n; i++) { smax = Math.Max(smax, Math.Abs(d[i])); } for(i=1; i<=n-1; i++) { smax = Math.Max(smax, Math.Abs(e[i])); } sminl = 0; if( (double)(tol)>=(double)(0) ) { // // Relative accuracy desired // sminoa = Math.Abs(d[1]); if( (double)(sminoa)!=(double)(0) ) { mu = sminoa; for(i=2; i<=n; i++) { mu = Math.Abs(d[i])*(mu/(mu+Math.Abs(e[i-1]))); sminoa = Math.Min(sminoa, mu); if( (double)(sminoa)==(double)(0) ) { break; } } } sminoa = sminoa/Math.Sqrt(n); thresh = Math.Max(tol*sminoa, maxitr*n*n*unfl); } else { // // Absolute accuracy desired // thresh = Math.Max(Math.Abs(tol)*smax, maxitr*n*n*unfl); } // // Prepare for main iteration loop for the singular values // (MAXIT is the maximum number of passes through the inner // loop permitted before nonconvergence signalled.) // maxit = maxitr*n*n; iter = 0; oldll = -1; oldm = -1; // // M points to last element of unconverged part of matrix // m = n; // // Begin main iteration loop // while( true ) { // // Check for convergence or exceeding iteration count // if( m<=1 ) { break; } if( iter>maxit ) { result = false; return result; } // // Find diagonal block of matrix to work on // if( (double)(tol)<(double)(0) && (double)(Math.Abs(d[m]))<=(double)(thresh) ) { d[m] = 0; } smax = Math.Abs(d[m]); smin = smax; matrixsplitflag = false; for(lll=1; lll<=m-1; lll++) { ll = m-lll; abss = Math.Abs(d[ll]); abse = Math.Abs(e[ll]); if( (double)(tol)<(double)(0) && (double)(abss)<=(double)(thresh) ) { d[ll] = 0; } if( (double)(abse)<=(double)(thresh) ) { matrixsplitflag = true; break; } smin = Math.Min(smin, abss); smax = Math.Max(smax, Math.Max(abss, abse)); } if( !matrixsplitflag ) { ll = 0; } else { // // Matrix splits since E(LL) = 0 // e[ll] = 0; if( ll==m-1 ) { // // Convergence of bottom singular value, return to top of loop // m = m-1; continue; } } ll = ll+1; // // E(LL) through E(M-1) are nonzero, E(LL-1) is zero // if( ll==m-1 ) { // // 2 by 2 block, handle separately // svdv2x2(d[m-1], e[m-1], d[m], ref sigmn, ref sigmx, ref sinr, ref cosr, ref sinl, ref cosl); d[m-1] = sigmx; e[m-1] = 0; d[m] = sigmn; // // Compute singular vectors, if desired // if( ncvt>0 ) { mm0 = m+(vstart-1); mm1 = m-1+(vstart-1); for(i_=vstart; i_<=vend;i_++) { vttemp[i_] = cosr*vt[mm1,i_]; } for(i_=vstart; i_<=vend;i_++) { vttemp[i_] = vttemp[i_] + sinr*vt[mm0,i_]; } for(i_=vstart; i_<=vend;i_++) { vt[mm0,i_] = cosr*vt[mm0,i_]; } for(i_=vstart; i_<=vend;i_++) { vt[mm0,i_] = vt[mm0,i_] - sinr*vt[mm1,i_]; } for(i_=vstart; i_<=vend;i_++) { vt[mm1,i_] = vttemp[i_]; } } if( nru>0 ) { mm0 = m+ustart-1; mm1 = m-1+ustart-1; for(i_=ustart; i_<=uend;i_++) { utemp[i_] = cosl*ut[mm1,i_]; } for(i_=ustart; i_<=uend;i_++) { utemp[i_] = utemp[i_] + sinl*ut[mm0,i_]; } for(i_=ustart; i_<=uend;i_++) { ut[mm0,i_] = cosl*ut[mm0,i_]; } for(i_=ustart; i_<=uend;i_++) { ut[mm0,i_] = ut[mm0,i_] - sinl*ut[mm1,i_]; } for(i_=ustart; i_<=uend;i_++) { ut[mm1,i_] = utemp[i_]; } } if( ncc>0 ) { mm0 = m+cstart-1; mm1 = m-1+cstart-1; for(i_=cstart; i_<=cend;i_++) { ctemp[i_] = cosl*c[mm1,i_]; } for(i_=cstart; i_<=cend;i_++) { ctemp[i_] = ctemp[i_] + sinl*c[mm0,i_]; } for(i_=cstart; i_<=cend;i_++) { c[mm0,i_] = cosl*c[mm0,i_]; } for(i_=cstart; i_<=cend;i_++) { c[mm0,i_] = c[mm0,i_] - sinl*c[mm1,i_]; } for(i_=cstart; i_<=cend;i_++) { c[mm1,i_] = ctemp[i_]; } } m = m-2; continue; } // // If working on new submatrix, choose shift direction // (from larger end diagonal element towards smaller) // // Previously was // "if (LL>OLDM) or (M // Very strange that LAPACK still contains it. // bchangedir = false; if( idir==1 && (double)(Math.Abs(d[ll]))<(double)(1.0E-3*Math.Abs(d[m])) ) { bchangedir = true; } if( idir==2 && (double)(Math.Abs(d[m]))<(double)(1.0E-3*Math.Abs(d[ll])) ) { bchangedir = true; } if( (ll!=oldll || m!=oldm) || bchangedir ) { if( (double)(Math.Abs(d[ll]))>=(double)(Math.Abs(d[m])) ) { // // Chase bulge from top (big end) to bottom (small end) // idir = 1; } else { // // Chase bulge from bottom (big end) to top (small end) // idir = 2; } } // // Apply convergence tests // if( idir==1 ) { // // Run convergence test in forward direction // First apply standard test to bottom of matrix // if( (double)(Math.Abs(e[m-1]))<=(double)(Math.Abs(tol)*Math.Abs(d[m])) || ((double)(tol)<(double)(0) && (double)(Math.Abs(e[m-1]))<=(double)(thresh)) ) { e[m-1] = 0; continue; } if( (double)(tol)>=(double)(0) ) { // // If relative accuracy desired, // apply convergence criterion forward // mu = Math.Abs(d[ll]); sminl = mu; iterflag = false; for(lll=ll; lll<=m-1; lll++) { if( (double)(Math.Abs(e[lll]))<=(double)(tol*mu) ) { e[lll] = 0; iterflag = true; break; } mu = Math.Abs(d[lll+1])*(mu/(mu+Math.Abs(e[lll]))); sminl = Math.Min(sminl, mu); } if( iterflag ) { continue; } } } else { // // Run convergence test in backward direction // First apply standard test to top of matrix // if( (double)(Math.Abs(e[ll]))<=(double)(Math.Abs(tol)*Math.Abs(d[ll])) || ((double)(tol)<(double)(0) && (double)(Math.Abs(e[ll]))<=(double)(thresh)) ) { e[ll] = 0; continue; } if( (double)(tol)>=(double)(0) ) { // // If relative accuracy desired, // apply convergence criterion backward // mu = Math.Abs(d[m]); sminl = mu; iterflag = false; for(lll=m-1; lll>=ll; lll--) { if( (double)(Math.Abs(e[lll]))<=(double)(tol*mu) ) { e[lll] = 0; iterflag = true; break; } mu = Math.Abs(d[lll])*(mu/(mu+Math.Abs(e[lll]))); sminl = Math.Min(sminl, mu); } if( iterflag ) { continue; } } } oldll = ll; oldm = m; // // Compute shift. First, test if shifting would ruin relative // accuracy, and if so set the shift to zero. // if( (double)(tol)>=(double)(0) && (double)(n*tol*(sminl/smax))<=(double)(Math.Max(eps, 0.01*tol)) ) { // // Use a zero shift to avoid loss of relative accuracy // shift = 0; } else { // // Compute the shift from 2-by-2 block at end of matrix // if( idir==1 ) { sll = Math.Abs(d[ll]); svd2x2(d[m-1], e[m-1], d[m], ref shift, ref r); } else { sll = Math.Abs(d[m]); svd2x2(d[ll], e[ll], d[ll+1], ref shift, ref r); } // // Test if shift negligible, and if so set to zero // if( (double)(sll)>(double)(0) ) { if( (double)(math.sqr(shift/sll))<(double)(eps) ) { shift = 0; } } } // // Increment iteration count // iter = iter+m-ll; // // If SHIFT = 0, do simplified QR iteration // if( (double)(shift)==(double)(0) ) { if( idir==1 ) { // // Chase bulge from top to bottom // Save cosines and sines for later singular vector updates // cs = 1; oldcs = 1; for(i=ll; i<=m-1; i++) { rotations.generaterotation(d[i]*cs, e[i], ref cs, ref sn, ref r); if( i>ll ) { e[i-1] = oldsn*r; } rotations.generaterotation(oldcs*r, d[i+1]*sn, ref oldcs, ref oldsn, ref tmp); d[i] = tmp; work0[i-ll+1] = cs; work1[i-ll+1] = sn; work2[i-ll+1] = oldcs; work3[i-ll+1] = oldsn; } h = d[m]*cs; d[m] = h*oldcs; e[m-1] = h*oldsn; // // Update singular vectors // if( ncvt>0 ) { rotations.applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp); } if( nru>0 ) { rotations.applyrotationsfromtheleft(fwddir, ll+ustart-1, m+ustart-1, ustart, uend, work2, work3, ut, utemp); } if( ncc>0 ) { rotations.applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp); } // // Test convergence // if( (double)(Math.Abs(e[m-1]))<=(double)(thresh) ) { e[m-1] = 0; } } else { // // Chase bulge from bottom to top // Save cosines and sines for later singular vector updates // cs = 1; oldcs = 1; for(i=m; i>=ll+1; i--) { rotations.generaterotation(d[i]*cs, e[i-1], ref cs, ref sn, ref r); if( i0 ) { rotations.applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp); } if( nru>0 ) { rotations.applyrotationsfromtheleft(!fwddir, ll+ustart-1, m+ustart-1, ustart, uend, work0, work1, ut, utemp); } if( ncc>0 ) { rotations.applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp); } // // Test convergence // if( (double)(Math.Abs(e[ll]))<=(double)(thresh) ) { e[ll] = 0; } } } else { // // Use nonzero shift // if( idir==1 ) { // // Chase bulge from top to bottom // Save cosines and sines for later singular vector updates // f = (Math.Abs(d[ll])-shift)*(extsignbdsqr(1, d[ll])+shift/d[ll]); g = e[ll]; for(i=ll; i<=m-1; i++) { rotations.generaterotation(f, g, ref cosr, ref sinr, ref r); if( i>ll ) { e[i-1] = r; } f = cosr*d[i]+sinr*e[i]; e[i] = cosr*e[i]-sinr*d[i]; g = sinr*d[i+1]; d[i+1] = cosr*d[i+1]; rotations.generaterotation(f, g, ref cosl, ref sinl, ref r); d[i] = r; f = cosl*e[i]+sinl*d[i+1]; d[i+1] = cosl*d[i+1]-sinl*e[i]; if( i0 ) { rotations.applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp); } if( nru>0 ) { rotations.applyrotationsfromtheleft(fwddir, ll+ustart-1, m+ustart-1, ustart, uend, work2, work3, ut, utemp); } if( ncc>0 ) { rotations.applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp); } // // Test convergence // if( (double)(Math.Abs(e[m-1]))<=(double)(thresh) ) { e[m-1] = 0; } } else { // // Chase bulge from bottom to top // Save cosines and sines for later singular vector updates // f = (Math.Abs(d[m])-shift)*(extsignbdsqr(1, d[m])+shift/d[m]); g = e[m-1]; for(i=m; i>=ll+1; i--) { rotations.generaterotation(f, g, ref cosr, ref sinr, ref r); if( ill+1 ) { g = sinl*e[i-2]; e[i-2] = cosl*e[i-2]; } work0[i-ll] = cosr; work1[i-ll] = -sinr; work2[i-ll] = cosl; work3[i-ll] = -sinl; } e[ll] = f; // // Test convergence // if( (double)(Math.Abs(e[ll]))<=(double)(thresh) ) { e[ll] = 0; } // // Update singular vectors if desired // if( ncvt>0 ) { rotations.applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp); } if( nru>0 ) { rotations.applyrotationsfromtheleft(!fwddir, ll+ustart-1, m+ustart-1, ustart, uend, work0, work1, ut, utemp); } if( ncc>0 ) { rotations.applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp); } } } // // QR iteration finished, go back and check convergence // continue; } // // All singular values converged, so make them positive // for(i=1; i<=n; i++) { if( (double)(d[i])<(double)(0) ) { d[i] = -d[i]; // // Change sign of singular vectors, if desired // if( ncvt>0 ) { for(i_=vstart; i_<=vend;i_++) { vt[i+vstart-1,i_] = -1*vt[i+vstart-1,i_]; } } } } // // Sort the singular values into decreasing order (insertion sort on // singular values, but only one transposition per singular vector) // for(i=1; i<=n-1; i++) { // // Scan for smallest D(I) // isub = 1; smin = d[1]; for(j=2; j<=n+1-i; j++) { if( (double)(d[j])<=(double)(smin) ) { isub = j; smin = d[j]; } } if( isub!=n+1-i ) { // // Swap singular values and vectors // d[isub] = d[n+1-i]; d[n+1-i] = smin; if( ncvt>0 ) { j = n+1-i; for(i_=vstart; i_<=vend;i_++) { vttemp[i_] = vt[isub+vstart-1,i_]; } for(i_=vstart; i_<=vend;i_++) { vt[isub+vstart-1,i_] = vt[j+vstart-1,i_]; } for(i_=vstart; i_<=vend;i_++) { vt[j+vstart-1,i_] = vttemp[i_]; } } if( nru>0 ) { j = n+1-i; for(i_=ustart; i_<=uend;i_++) { utemp[i_] = ut[isub+ustart-1,i_]; } for(i_=ustart; i_<=uend;i_++) { ut[isub+ustart-1,i_] = ut[j+ustart-1,i_]; } for(i_=ustart; i_<=uend;i_++) { ut[j+ustart-1,i_] = utemp[i_]; } } if( ncc>0 ) { j = n+1-i; for(i_=cstart; i_<=cend;i_++) { ctemp[i_] = c[isub+cstart-1,i_]; } for(i_=cstart; i_<=cend;i_++) { c[isub+cstart-1,i_] = c[j+cstart-1,i_]; } for(i_=cstart; i_<=cend;i_++) { c[j+cstart-1,i_] = ctemp[i_]; } } } } // // Copy U back from temporary storage // if( nru>0 ) { ablas.rmatrixtranspose(n, nru, ut, ustart, ustart, uu, ustart, ustart); } return result; } private static double extsignbdsqr(double a, double b) { double result = 0; if( (double)(b)>=(double)(0) ) { result = Math.Abs(a); } else { result = -Math.Abs(a); } return result; } private static void svd2x2(double f, double g, double h, ref double ssmin, ref double ssmax) { double aas = 0; double at = 0; double au = 0; double c = 0; double fa = 0; double fhmn = 0; double fhmx = 0; double ga = 0; double ha = 0; ssmin = 0; ssmax = 0; fa = Math.Abs(f); ga = Math.Abs(g); ha = Math.Abs(h); fhmn = Math.Min(fa, ha); fhmx = Math.Max(fa, ha); if( (double)(fhmn)==(double)(0) ) { ssmin = 0; if( (double)(fhmx)==(double)(0) ) { ssmax = ga; } else { ssmax = Math.Max(fhmx, ga)*Math.Sqrt(1+math.sqr(Math.Min(fhmx, ga)/Math.Max(fhmx, ga))); } } else { if( (double)(ga)<(double)(fhmx) ) { aas = 1+fhmn/fhmx; at = (fhmx-fhmn)/fhmx; au = math.sqr(ga/fhmx); c = 2/(Math.Sqrt(aas*aas+au)+Math.Sqrt(at*at+au)); ssmin = fhmn*c; ssmax = fhmx/c; } else { au = fhmx/ga; if( (double)(au)==(double)(0) ) { // // Avoid possible harmful underflow if exponent range // asymmetric (true SSMIN may not underflow even if // AU underflows) // ssmin = fhmn*fhmx/ga; ssmax = ga; } else { aas = 1+fhmn/fhmx; at = (fhmx-fhmn)/fhmx; c = 1/(Math.Sqrt(1+math.sqr(aas*au))+Math.Sqrt(1+math.sqr(at*au))); ssmin = fhmn*c*au; ssmin = ssmin+ssmin; ssmax = ga/(c+c); } } } } private static void svdv2x2(double f, double g, double h, ref double ssmin, ref double ssmax, ref double snr, ref double csr, ref double snl, ref double csl) { bool gasmal = new bool(); bool swp = new bool(); int pmax = 0; double a = 0; double clt = 0; double crt = 0; double d = 0; double fa = 0; double ft = 0; double ga = 0; double gt = 0; double ha = 0; double ht = 0; double l = 0; double m = 0; double mm = 0; double r = 0; double s = 0; double slt = 0; double srt = 0; double t = 0; double temp = 0; double tsign = 0; double tt = 0; double v = 0; ssmin = 0; ssmax = 0; snr = 0; csr = 0; snl = 0; csl = 0; ft = f; fa = Math.Abs(ft); ht = h; ha = Math.Abs(h); // // these initializers are not really necessary, // but without them compiler complains about uninitialized locals // clt = 0; crt = 0; slt = 0; srt = 0; tsign = 0; // // PMAX points to the maximum absolute element of matrix // PMAX = 1 if F largest in absolute values // PMAX = 2 if G largest in absolute values // PMAX = 3 if H largest in absolute values // pmax = 1; swp = (double)(ha)>(double)(fa); if( swp ) { // // Now FA .ge. HA // pmax = 3; temp = ft; ft = ht; ht = temp; temp = fa; fa = ha; ha = temp; } gt = g; ga = Math.Abs(gt); if( (double)(ga)==(double)(0) ) { // // Diagonal matrix // ssmin = ha; ssmax = fa; clt = 1; crt = 1; slt = 0; srt = 0; } else { gasmal = true; if( (double)(ga)>(double)(fa) ) { pmax = 2; if( (double)(fa/ga)<(double)(math.machineepsilon) ) { // // Case of very large GA // gasmal = false; ssmax = ga; if( (double)(ha)>(double)(1) ) { v = ga/ha; ssmin = fa/v; } else { v = fa/ga; ssmin = v*ha; } clt = 1; slt = ht/gt; srt = 1; crt = ft/gt; } } if( gasmal ) { // // Normal case // d = fa-ha; if( (double)(d)==(double)(fa) ) { l = 1; } else { l = d/fa; } m = gt/ft; t = 2-l; mm = m*m; tt = t*t; s = Math.Sqrt(tt+mm); if( (double)(l)==(double)(0) ) { r = Math.Abs(m); } else { r = Math.Sqrt(l*l+mm); } a = 0.5*(s+r); ssmin = ha/a; ssmax = fa*a; if( (double)(mm)==(double)(0) ) { // // Note that M is very tiny // if( (double)(l)==(double)(0) ) { t = extsignbdsqr(2, ft)*extsignbdsqr(1, gt); } else { t = gt/extsignbdsqr(d, ft)+m/t; } } else { t = (m/(s+t)+m/(r+l))*(1+a); } l = Math.Sqrt(t*t+4); crt = 2/l; srt = t/l; clt = (crt+srt*m)/a; v = ht/ft; slt = v*srt/a; } } if( swp ) { csl = srt; snl = crt; csr = slt; snr = clt; } else { csl = clt; snl = slt; csr = crt; snr = srt; } // // Correct signs of SSMAX and SSMIN // if( pmax==1 ) { tsign = extsignbdsqr(1, csr)*extsignbdsqr(1, csl)*extsignbdsqr(1, f); } if( pmax==2 ) { tsign = extsignbdsqr(1, snr)*extsignbdsqr(1, csl)*extsignbdsqr(1, g); } if( pmax==3 ) { tsign = extsignbdsqr(1, snr)*extsignbdsqr(1, snl)*extsignbdsqr(1, h); } ssmax = extsignbdsqr(ssmax, tsign); ssmin = extsignbdsqr(ssmin, tsign*extsignbdsqr(1, f)*extsignbdsqr(1, h)); } } public class svd { /************************************************************************* Singular value decomposition of a rectangular matrix. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is only partially supported (some parts are ! optimized, but most - are not). ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. The algorithm calculates the singular value decomposition of a matrix of size MxN: A = U * S * V^T The algorithm finds the singular values and, optionally, matrices U and V^T. The algorithm can find both first min(M,N) columns of matrix U and rows of matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM and NxN respectively). Take into account that the subroutine does not return matrix V but V^T. Input parameters: A - matrix to be decomposed. Array whose indexes range within [0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. UNeeded - 0, 1 or 2. See the description of the parameter U. VTNeeded - 0, 1 or 2. See the description of the parameter VT. AdditionalMemory - If the parameter: * equals 0, the algorithm doesn’t use additional memory (lower requirements, lower performance). * equals 1, the algorithm uses additional memory of size min(M,N)*min(M,N) of real numbers. It often speeds up the algorithm. * equals 2, the algorithm uses additional memory of size M*min(M,N) of real numbers. It allows to get a maximum performance. The recommended value of the parameter is 2. Output parameters: W - contains singular values in descending order. U - if UNeeded=0, U isn't changed, the left singular vectors are not calculated. if Uneeded=1, U contains left singular vectors (first min(M,N) columns of matrix U). Array whose indexes range within [0..M-1, 0..Min(M,N)-1]. if UNeeded=2, U contains matrix U wholly. Array whose indexes range within [0..M-1, 0..M-1]. VT - if VTNeeded=0, VT isn’t changed, the right singular vectors are not calculated. if VTNeeded=1, VT contains right singular vectors (first min(M,N) rows of matrix V^T). Array whose indexes range within [0..min(M,N)-1, 0..N-1]. if VTNeeded=2, VT contains matrix V^T wholly. Array whose indexes range within [0..N-1, 0..N-1]. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static bool rmatrixsvd(double[,] a, int m, int n, int uneeded, int vtneeded, int additionalmemory, ref double[] w, ref double[,] u, ref double[,] vt) { bool result = new bool(); double[] tauq = new double[0]; double[] taup = new double[0]; double[] tau = new double[0]; double[] e = new double[0]; double[] work = new double[0]; double[,] t2 = new double[0,0]; bool isupper = new bool(); int minmn = 0; int ncu = 0; int nrvt = 0; int nru = 0; int ncvt = 0; int i = 0; int j = 0; a = (double[,])a.Clone(); w = new double[0]; u = new double[0,0]; vt = new double[0,0]; result = true; if( m==0 || n==0 ) { return result; } alglib.ap.assert(uneeded>=0 && uneeded<=2, "SVDDecomposition: wrong parameters!"); alglib.ap.assert(vtneeded>=0 && vtneeded<=2, "SVDDecomposition: wrong parameters!"); alglib.ap.assert(additionalmemory>=0 && additionalmemory<=2, "SVDDecomposition: wrong parameters!"); // // initialize // minmn = Math.Min(m, n); w = new double[minmn+1]; ncu = 0; nru = 0; if( uneeded==1 ) { nru = m; ncu = minmn; u = new double[nru-1+1, ncu-1+1]; } if( uneeded==2 ) { nru = m; ncu = m; u = new double[nru-1+1, ncu-1+1]; } nrvt = 0; ncvt = 0; if( vtneeded==1 ) { nrvt = minmn; ncvt = n; vt = new double[nrvt-1+1, ncvt-1+1]; } if( vtneeded==2 ) { nrvt = n; ncvt = n; vt = new double[nrvt-1+1, ncvt-1+1]; } // // M much larger than N // Use bidiagonal reduction with QR-decomposition // if( (double)(m)>(double)(1.6*n) ) { if( uneeded==0 ) { // // No left singular vectors to be computed // ortfac.rmatrixqr(ref a, m, n, ref tau); for(i=0; i<=n-1; i++) { for(j=0; j<=i-1; j++) { a[i,j] = 0; } } ortfac.rmatrixbd(ref a, n, n, ref tauq, ref taup); ortfac.rmatrixbdunpackpt(a, n, n, taup, nrvt, ref vt); ortfac.rmatrixbdunpackdiagonals(a, n, n, ref isupper, ref w, ref e); result = bdsvd.rmatrixbdsvd(ref w, e, n, isupper, false, ref u, 0, ref a, 0, ref vt, ncvt); return result; } else { // // Left singular vectors (may be full matrix U) to be computed // ortfac.rmatrixqr(ref a, m, n, ref tau); ortfac.rmatrixqrunpackq(a, m, n, tau, ncu, ref u); for(i=0; i<=n-1; i++) { for(j=0; j<=i-1; j++) { a[i,j] = 0; } } ortfac.rmatrixbd(ref a, n, n, ref tauq, ref taup); ortfac.rmatrixbdunpackpt(a, n, n, taup, nrvt, ref vt); ortfac.rmatrixbdunpackdiagonals(a, n, n, ref isupper, ref w, ref e); if( additionalmemory<1 ) { // // No additional memory can be used // ortfac.rmatrixbdmultiplybyq(a, n, n, tauq, ref u, m, n, true, false); result = bdsvd.rmatrixbdsvd(ref w, e, n, isupper, false, ref u, m, ref a, 0, ref vt, ncvt); } else { // // Large U. Transforming intermediate matrix T2 // work = new double[Math.Max(m, n)+1]; ortfac.rmatrixbdunpackq(a, n, n, tauq, n, ref t2); blas.copymatrix(u, 0, m-1, 0, n-1, ref a, 0, m-1, 0, n-1); blas.inplacetranspose(ref t2, 0, n-1, 0, n-1, ref work); result = bdsvd.rmatrixbdsvd(ref w, e, n, isupper, false, ref u, 0, ref t2, n, ref vt, ncvt); ablas.rmatrixgemm(m, n, n, 1.0, a, 0, 0, 0, t2, 0, 0, 1, 0.0, u, 0, 0); } return result; } } // // N much larger than M // Use bidiagonal reduction with LQ-decomposition // if( (double)(n)>(double)(1.6*m) ) { if( vtneeded==0 ) { // // No right singular vectors to be computed // ortfac.rmatrixlq(ref a, m, n, ref tau); for(i=0; i<=m-1; i++) { for(j=i+1; j<=m-1; j++) { a[i,j] = 0; } } ortfac.rmatrixbd(ref a, m, m, ref tauq, ref taup); ortfac.rmatrixbdunpackq(a, m, m, tauq, ncu, ref u); ortfac.rmatrixbdunpackdiagonals(a, m, m, ref isupper, ref w, ref e); work = new double[m+1]; blas.inplacetranspose(ref u, 0, nru-1, 0, ncu-1, ref work); result = bdsvd.rmatrixbdsvd(ref w, e, m, isupper, false, ref a, 0, ref u, nru, ref vt, 0); blas.inplacetranspose(ref u, 0, nru-1, 0, ncu-1, ref work); return result; } else { // // Right singular vectors (may be full matrix VT) to be computed // ortfac.rmatrixlq(ref a, m, n, ref tau); ortfac.rmatrixlqunpackq(a, m, n, tau, nrvt, ref vt); for(i=0; i<=m-1; i++) { for(j=i+1; j<=m-1; j++) { a[i,j] = 0; } } ortfac.rmatrixbd(ref a, m, m, ref tauq, ref taup); ortfac.rmatrixbdunpackq(a, m, m, tauq, ncu, ref u); ortfac.rmatrixbdunpackdiagonals(a, m, m, ref isupper, ref w, ref e); work = new double[Math.Max(m, n)+1]; blas.inplacetranspose(ref u, 0, nru-1, 0, ncu-1, ref work); if( additionalmemory<1 ) { // // No additional memory available // ortfac.rmatrixbdmultiplybyp(a, m, m, taup, ref vt, m, n, false, true); result = bdsvd.rmatrixbdsvd(ref w, e, m, isupper, false, ref a, 0, ref u, nru, ref vt, n); } else { // // Large VT. Transforming intermediate matrix T2 // ortfac.rmatrixbdunpackpt(a, m, m, taup, m, ref t2); result = bdsvd.rmatrixbdsvd(ref w, e, m, isupper, false, ref a, 0, ref u, nru, ref t2, m); blas.copymatrix(vt, 0, m-1, 0, n-1, ref a, 0, m-1, 0, n-1); ablas.rmatrixgemm(m, n, m, 1.0, t2, 0, 0, 0, a, 0, 0, 0, 0.0, vt, 0, 0); } blas.inplacetranspose(ref u, 0, nru-1, 0, ncu-1, ref work); return result; } } // // M<=N // We can use inplace transposition of U to get rid of columnwise operations // if( m<=n ) { ortfac.rmatrixbd(ref a, m, n, ref tauq, ref taup); ortfac.rmatrixbdunpackq(a, m, n, tauq, ncu, ref u); ortfac.rmatrixbdunpackpt(a, m, n, taup, nrvt, ref vt); ortfac.rmatrixbdunpackdiagonals(a, m, n, ref isupper, ref w, ref e); work = new double[m+1]; blas.inplacetranspose(ref u, 0, nru-1, 0, ncu-1, ref work); result = bdsvd.rmatrixbdsvd(ref w, e, minmn, isupper, false, ref a, 0, ref u, nru, ref vt, ncvt); blas.inplacetranspose(ref u, 0, nru-1, 0, ncu-1, ref work); return result; } // // Simple bidiagonal reduction // ortfac.rmatrixbd(ref a, m, n, ref tauq, ref taup); ortfac.rmatrixbdunpackq(a, m, n, tauq, ncu, ref u); ortfac.rmatrixbdunpackpt(a, m, n, taup, nrvt, ref vt); ortfac.rmatrixbdunpackdiagonals(a, m, n, ref isupper, ref w, ref e); if( additionalmemory<2 || uneeded==0 ) { // // We cant use additional memory or there is no need in such operations // result = bdsvd.rmatrixbdsvd(ref w, e, minmn, isupper, false, ref u, nru, ref a, 0, ref vt, ncvt); } else { // // We can use additional memory // t2 = new double[minmn-1+1, m-1+1]; blas.copyandtranspose(u, 0, m-1, 0, minmn-1, ref t2, 0, minmn-1, 0, m-1); result = bdsvd.rmatrixbdsvd(ref w, e, minmn, isupper, false, ref u, 0, ref t2, m, ref vt, ncvt); blas.copyandtranspose(t2, 0, minmn-1, 0, m-1, ref u, 0, m-1, 0, minmn-1); } return result; } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static bool _pexec_rmatrixsvd(double[,] a, int m, int n, int uneeded, int vtneeded, int additionalmemory, ref double[] w, ref double[,] u, ref double[,] vt) { return rmatrixsvd(a,m,n,uneeded,vtneeded,additionalmemory,ref w,ref u,ref vt); } } public class evd { /************************************************************************* Finding the eigenvalues and eigenvectors of a symmetric matrix The algorithm finds eigen pairs of a symmetric matrix by reducing it to tridiagonal form and using the QL/QR algorithm. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - symmetric matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. IsUpper - storage format. Output parameters: D - eigenvalues in ascending order. Array whose index ranges within [0..N-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains the eigenvectors. Array whose indexes range within [0..N-1, 0..N-1]. The eigenvectors are stored in the matrix columns. Result: True, if the algorithm has converged. False, if the algorithm hasn't converged (rare case). -- ALGLIB -- Copyright 2005-2008 by Bochkanov Sergey *************************************************************************/ public static bool smatrixevd(double[,] a, int n, int zneeded, bool isupper, ref double[] d, ref double[,] z) { bool result = new bool(); double[] tau = new double[0]; double[] e = new double[0]; a = (double[,])a.Clone(); d = new double[0]; z = new double[0,0]; alglib.ap.assert(zneeded==0 || zneeded==1, "SMatrixEVD: incorrect ZNeeded"); ortfac.smatrixtd(ref a, n, isupper, ref tau, ref d, ref e); if( zneeded==1 ) { ortfac.smatrixtdunpackq(a, n, isupper, tau, ref z); } result = smatrixtdevd(ref d, e, n, zneeded, ref z); return result; } /************************************************************************* Subroutine for finding the eigenvalues (and eigenvectors) of a symmetric matrix in a given half open interval (A, B] by using a bisection and inverse iteration Input parameters: A - symmetric matrix which is given by its upper or lower triangular part. Array [0..N-1, 0..N-1]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. IsUpperA - storage format of matrix A. B1, B2 - half open interval (B1, B2] to search eigenvalues in. Output parameters: M - number of eigenvalues found in a given half-interval (M>=0). W - array of the eigenvalues found. Array whose index ranges within [0..M-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains eigenvectors. Array whose indexes range within [0..N-1, 0..M-1]. The eigenvectors are stored in the matrix columns. Result: True, if successful. M contains the number of eigenvalues in the given half-interval (could be equal to 0), W contains the eigenvalues, Z contains the eigenvectors (if needed). False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned, M is equal to 0. -- ALGLIB -- Copyright 07.01.2006 by Bochkanov Sergey *************************************************************************/ public static bool smatrixevdr(double[,] a, int n, int zneeded, bool isupper, double b1, double b2, ref int m, ref double[] w, ref double[,] z) { bool result = new bool(); double[] tau = new double[0]; double[] e = new double[0]; a = (double[,])a.Clone(); m = 0; w = new double[0]; z = new double[0,0]; alglib.ap.assert(zneeded==0 || zneeded==1, "SMatrixTDEVDR: incorrect ZNeeded"); ortfac.smatrixtd(ref a, n, isupper, ref tau, ref w, ref e); if( zneeded==1 ) { ortfac.smatrixtdunpackq(a, n, isupper, tau, ref z); } result = smatrixtdevdr(ref w, e, n, zneeded, b1, b2, ref m, ref z); return result; } /************************************************************************* Subroutine for finding the eigenvalues and eigenvectors of a symmetric matrix with given indexes by using bisection and inverse iteration methods. Input parameters: A - symmetric matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. IsUpperA - storage format of matrix A. I1, I2 - index interval for searching (from I1 to I2). 0 <= I1 <= I2 <= N-1. Output parameters: W - array of the eigenvalues found. Array whose index ranges within [0..I2-I1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains eigenvectors. Array whose indexes range within [0..N-1, 0..I2-I1]. In that case, the eigenvectors are stored in the matrix columns. Result: True, if successful. W contains the eigenvalues, Z contains the eigenvectors (if needed). False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned. -- ALGLIB -- Copyright 07.01.2006 by Bochkanov Sergey *************************************************************************/ public static bool smatrixevdi(double[,] a, int n, int zneeded, bool isupper, int i1, int i2, ref double[] w, ref double[,] z) { bool result = new bool(); double[] tau = new double[0]; double[] e = new double[0]; a = (double[,])a.Clone(); w = new double[0]; z = new double[0,0]; alglib.ap.assert(zneeded==0 || zneeded==1, "SMatrixEVDI: incorrect ZNeeded"); ortfac.smatrixtd(ref a, n, isupper, ref tau, ref w, ref e); if( zneeded==1 ) { ortfac.smatrixtdunpackq(a, n, isupper, tau, ref z); } result = smatrixtdevdi(ref w, e, n, zneeded, i1, i2, ref z); return result; } /************************************************************************* Finding the eigenvalues and eigenvectors of a Hermitian matrix The algorithm finds eigen pairs of a Hermitian matrix by reducing it to real tridiagonal form and using the QL/QR algorithm. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - Hermitian matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. Output parameters: D - eigenvalues in ascending order. Array whose index ranges within [0..N-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains the eigenvectors. Array whose indexes range within [0..N-1, 0..N-1]. The eigenvectors are stored in the matrix columns. Result: True, if the algorithm has converged. False, if the algorithm hasn't converged (rare case). Note: eigenvectors of Hermitian matrix are defined up to multiplication by a complex number L, such that |L|=1. -- ALGLIB -- Copyright 2005, 23 March 2007 by Bochkanov Sergey *************************************************************************/ public static bool hmatrixevd(complex[,] a, int n, int zneeded, bool isupper, ref double[] d, ref complex[,] z) { bool result = new bool(); complex[] tau = new complex[0]; double[] e = new double[0]; double[,] t = new double[0,0]; double[,] qz = new double[0,0]; complex[,] q = new complex[0,0]; int i = 0; int j = 0; a = (complex[,])a.Clone(); d = new double[0]; z = new complex[0,0]; alglib.ap.assert(zneeded==0 || zneeded==1, "HermitianEVD: incorrect ZNeeded"); // // Reduce to tridiagonal form // ortfac.hmatrixtd(ref a, n, isupper, ref tau, ref d, ref e); if( zneeded==1 ) { ortfac.hmatrixtdunpackq(a, n, isupper, tau, ref q); zneeded = 2; } // // TDEVD // result = smatrixtdevd(ref d, e, n, zneeded, ref t); // // Eigenvectors are needed // Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T // if( result && zneeded!=0 ) { z = new complex[n, n]; qz = new double[n, 2*n]; // // Calculate Re(Q)*T // for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { qz[i,j] = q[i,j].x; } } ablas.rmatrixgemm(n, n, n, 1.0, qz, 0, 0, 0, t, 0, 0, 0, 0.0, qz, 0, n); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { z[i,j].x = qz[i,n+j]; } } // // Calculate Im(Q)*T // for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { qz[i,j] = q[i,j].y; } } ablas.rmatrixgemm(n, n, n, 1.0, qz, 0, 0, 0, t, 0, 0, 0, 0.0, qz, 0, n); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { z[i,j].y = qz[i,n+j]; } } } return result; } /************************************************************************* Subroutine for finding the eigenvalues (and eigenvectors) of a Hermitian matrix in a given half-interval (A, B] by using a bisection and inverse iteration Input parameters: A - Hermitian matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. IsUpperA - storage format of matrix A. B1, B2 - half-interval (B1, B2] to search eigenvalues in. Output parameters: M - number of eigenvalues found in a given half-interval, M>=0 W - array of the eigenvalues found. Array whose index ranges within [0..M-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains eigenvectors. Array whose indexes range within [0..N-1, 0..M-1]. The eigenvectors are stored in the matrix columns. Result: True, if successful. M contains the number of eigenvalues in the given half-interval (could be equal to 0), W contains the eigenvalues, Z contains the eigenvectors (if needed). False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned, M is equal to 0. Note: eigen vectors of Hermitian matrix are defined up to multiplication by a complex number L, such as |L|=1. -- ALGLIB -- Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey. *************************************************************************/ public static bool hmatrixevdr(complex[,] a, int n, int zneeded, bool isupper, double b1, double b2, ref int m, ref double[] w, ref complex[,] z) { bool result = new bool(); complex[,] q = new complex[0,0]; double[,] t = new double[0,0]; complex[] tau = new complex[0]; double[] e = new double[0]; double[] work = new double[0]; int i = 0; int k = 0; double v = 0; int i_ = 0; a = (complex[,])a.Clone(); m = 0; w = new double[0]; z = new complex[0,0]; alglib.ap.assert(zneeded==0 || zneeded==1, "HermitianEigenValuesAndVectorsInInterval: incorrect ZNeeded"); // // Reduce to tridiagonal form // ortfac.hmatrixtd(ref a, n, isupper, ref tau, ref w, ref e); if( zneeded==1 ) { ortfac.hmatrixtdunpackq(a, n, isupper, tau, ref q); zneeded = 2; } // // Bisection and inverse iteration // result = smatrixtdevdr(ref w, e, n, zneeded, b1, b2, ref m, ref t); // // Eigenvectors are needed // Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T // if( (result && zneeded!=0) && m!=0 ) { work = new double[m-1+1]; z = new complex[n-1+1, m-1+1]; for(i=0; i<=n-1; i++) { // // Calculate real part // for(k=0; k<=m-1; k++) { work[k] = 0; } for(k=0; k<=n-1; k++) { v = q[i,k].x; for(i_=0; i_<=m-1;i_++) { work[i_] = work[i_] + v*t[k,i_]; } } for(k=0; k<=m-1; k++) { z[i,k].x = work[k]; } // // Calculate imaginary part // for(k=0; k<=m-1; k++) { work[k] = 0; } for(k=0; k<=n-1; k++) { v = q[i,k].y; for(i_=0; i_<=m-1;i_++) { work[i_] = work[i_] + v*t[k,i_]; } } for(k=0; k<=m-1; k++) { z[i,k].y = work[k]; } } } return result; } /************************************************************************* Subroutine for finding the eigenvalues and eigenvectors of a Hermitian matrix with given indexes by using bisection and inverse iteration methods Input parameters: A - Hermitian matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. IsUpperA - storage format of matrix A. I1, I2 - index interval for searching (from I1 to I2). 0 <= I1 <= I2 <= N-1. Output parameters: W - array of the eigenvalues found. Array whose index ranges within [0..I2-I1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains eigenvectors. Array whose indexes range within [0..N-1, 0..I2-I1]. In that case, the eigenvectors are stored in the matrix columns. Result: True, if successful. W contains the eigenvalues, Z contains the eigenvectors (if needed). False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned. Note: eigen vectors of Hermitian matrix are defined up to multiplication by a complex number L, such as |L|=1. -- ALGLIB -- Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey. *************************************************************************/ public static bool hmatrixevdi(complex[,] a, int n, int zneeded, bool isupper, int i1, int i2, ref double[] w, ref complex[,] z) { bool result = new bool(); complex[,] q = new complex[0,0]; double[,] t = new double[0,0]; complex[] tau = new complex[0]; double[] e = new double[0]; double[] work = new double[0]; int i = 0; int k = 0; double v = 0; int m = 0; int i_ = 0; a = (complex[,])a.Clone(); w = new double[0]; z = new complex[0,0]; alglib.ap.assert(zneeded==0 || zneeded==1, "HermitianEigenValuesAndVectorsByIndexes: incorrect ZNeeded"); // // Reduce to tridiagonal form // ortfac.hmatrixtd(ref a, n, isupper, ref tau, ref w, ref e); if( zneeded==1 ) { ortfac.hmatrixtdunpackq(a, n, isupper, tau, ref q); zneeded = 2; } // // Bisection and inverse iteration // result = smatrixtdevdi(ref w, e, n, zneeded, i1, i2, ref t); // // Eigenvectors are needed // Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T // m = i2-i1+1; if( result && zneeded!=0 ) { work = new double[m-1+1]; z = new complex[n-1+1, m-1+1]; for(i=0; i<=n-1; i++) { // // Calculate real part // for(k=0; k<=m-1; k++) { work[k] = 0; } for(k=0; k<=n-1; k++) { v = q[i,k].x; for(i_=0; i_<=m-1;i_++) { work[i_] = work[i_] + v*t[k,i_]; } } for(k=0; k<=m-1; k++) { z[i,k].x = work[k]; } // // Calculate imaginary part // for(k=0; k<=m-1; k++) { work[k] = 0; } for(k=0; k<=n-1; k++) { v = q[i,k].y; for(i_=0; i_<=m-1;i_++) { work[i_] = work[i_] + v*t[k,i_]; } } for(k=0; k<=m-1; k++) { z[i,k].y = work[k]; } } } return result; } /************************************************************************* Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by using an QL/QR algorithm with implicit shifts. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Generally, commercial ALGLIB is several times faster than open-source ! generic C edition, and many times faster than open-source C# edition. ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: D - the main diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-1]. E - the secondary diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-2]. N - size of matrix A. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not needed; * 1, the eigenvectors of a tridiagonal matrix are multiplied by the square matrix Z. It is used if the tridiagonal matrix is obtained by the similarity transformation of a symmetric matrix; * 2, the eigenvectors of a tridiagonal matrix replace the square matrix Z; * 3, matrix Z contains the first row of the eigenvectors matrix. Z - if ZNeeded=1, Z contains the square matrix by which the eigenvectors are multiplied. Array whose indexes range within [0..N-1, 0..N-1]. Output parameters: D - eigenvalues in ascending order. Array whose index ranges within [0..N-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains the product of a given matrix (from the left) and the eigenvectors matrix (from the right); * 2, Z contains the eigenvectors. * 3, Z contains the first row of the eigenvectors matrix. If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1]. In that case, the eigenvectors are stored in the matrix columns. If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1]. Result: True, if the algorithm has converged. False, if the algorithm hasn't converged. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 *************************************************************************/ public static bool smatrixtdevd(ref double[] d, double[] e, int n, int zneeded, ref double[,] z) { bool result = new bool(); double[] d1 = new double[0]; double[] e1 = new double[0]; double[] ex = new double[0]; double[,] z1 = new double[0,0]; int i = 0; int j = 0; int i_ = 0; int i1_ = 0; e = (double[])e.Clone(); alglib.ap.assert(n>=1, "SMatrixTDEVD: N<=0"); alglib.ap.assert(zneeded>=0 && zneeded<=3, "SMatrixTDEVD: incorrect ZNeeded"); result = false; // // Preprocess Z: make ZNeeded equal to 0, 1 or 3. // Ensure that memory for Z is allocated. // if( zneeded==2 ) { // // Load identity to Z // apserv.rmatrixsetlengthatleast(ref z, n, n); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { z[i,j] = 0.0; } z[i,i] = 1.0; } zneeded = 1; } if( zneeded==3 ) { // // Allocate memory // apserv.rmatrixsetlengthatleast(ref z, 1, n); } // // Try to solve problem with MKL // ex = new double[n]; for(i=0; i<=n-2; i++) { ex[i] = e[i]; } if( ablasmkl.smatrixtdevdmkl(d, ex, n, zneeded, z, ref result) ) { return result; } // // Prepare 1-based task // d1 = new double[n+1]; e1 = new double[n+1]; i1_ = (0) - (1); for(i_=1; i_<=n;i_++) { d1[i_] = d[i_+i1_]; } if( n>1 ) { i1_ = (0) - (1); for(i_=1; i_<=n-1;i_++) { e1[i_] = e[i_+i1_]; } } if( zneeded==1 ) { z1 = new double[n+1, n+1]; for(i=1; i<=n; i++) { i1_ = (0) - (1); for(i_=1; i_<=n;i_++) { z1[i,i_] = z[i-1,i_+i1_]; } } } // // Solve 1-based task // result = tridiagonalevd(ref d1, e1, n, zneeded, ref z1); if( !result ) { return result; } // // Convert back to 0-based result // i1_ = (1) - (0); for(i_=0; i_<=n-1;i_++) { d[i_] = d1[i_+i1_]; } if( zneeded!=0 ) { if( zneeded==1 ) { for(i=1; i<=n; i++) { i1_ = (1) - (0); for(i_=0; i_<=n-1;i_++) { z[i-1,i_] = z1[i,i_+i1_]; } } return result; } if( zneeded==2 ) { z = new double[n-1+1, n-1+1]; for(i=1; i<=n; i++) { i1_ = (1) - (0); for(i_=0; i_<=n-1;i_++) { z[i-1,i_] = z1[i,i_+i1_]; } } return result; } if( zneeded==3 ) { z = new double[0+1, n-1+1]; i1_ = (1) - (0); for(i_=0; i_<=n-1;i_++) { z[0,i_] = z1[1,i_+i1_]; } return result; } alglib.ap.assert(false, "SMatrixTDEVD: Incorrect ZNeeded!"); } return result; } /************************************************************************* Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a given half-interval (A, B] by using bisection and inverse iteration. Input parameters: D - the main diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-1]. E - the secondary diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-2]. N - size of matrix, N>=0. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not needed; * 1, the eigenvectors of a tridiagonal matrix are multiplied by the square matrix Z. It is used if the tridiagonal matrix is obtained by the similarity transformation of a symmetric matrix. * 2, the eigenvectors of a tridiagonal matrix replace matrix Z. A, B - half-interval (A, B] to search eigenvalues in. Z - if ZNeeded is equal to: * 0, Z isn't used and remains unchanged; * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1]) which reduces the given symmetric matrix to tridiagonal form; * 2, Z isn't used (but changed on the exit). Output parameters: D - array of the eigenvalues found. Array whose index ranges within [0..M-1]. M - number of eigenvalues found in the given half-interval (M>=0). Z - if ZNeeded is equal to: * 0, doesn't contain any information; * 1, contains the product of a given NxN matrix Z (from the left) and NxM matrix of the eigenvectors found (from the right). Array whose indexes range within [0..N-1, 0..M-1]. * 2, contains the matrix of the eigenvectors found. Array whose indexes range within [0..N-1, 0..M-1]. Result: True, if successful. In that case, M contains the number of eigenvalues in the given half-interval (could be equal to 0), D contains the eigenvalues, Z contains the eigenvectors (if needed). It should be noted that the subroutine changes the size of arrays D and Z. False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned, M is equal to 0. -- ALGLIB -- Copyright 31.03.2008 by Bochkanov Sergey *************************************************************************/ public static bool smatrixtdevdr(ref double[] d, double[] e, int n, int zneeded, double a, double b, ref int m, ref double[,] z) { bool result = new bool(); int errorcode = 0; int nsplit = 0; int i = 0; int j = 0; int k = 0; int cr = 0; int[] iblock = new int[0]; int[] isplit = new int[0]; int[] ifail = new int[0]; double[] d1 = new double[0]; double[] e1 = new double[0]; double[] w = new double[0]; double[,] z2 = new double[0,0]; double[,] z3 = new double[0,0]; double v = 0; int i_ = 0; int i1_ = 0; m = 0; alglib.ap.assert(zneeded>=0 && zneeded<=2, "SMatrixTDEVDR: incorrect ZNeeded!"); // // Special cases // if( (double)(b)<=(double)(a) ) { m = 0; result = true; return result; } if( n<=0 ) { m = 0; result = true; return result; } // // Copy D,E to D1, E1 // d1 = new double[n+1]; i1_ = (0) - (1); for(i_=1; i_<=n;i_++) { d1[i_] = d[i_+i1_]; } if( n>1 ) { e1 = new double[n-1+1]; i1_ = (0) - (1); for(i_=1; i_<=n-1;i_++) { e1[i_] = e[i_+i1_]; } } // // No eigen vectors // if( zneeded==0 ) { result = internalbisectioneigenvalues(d1, e1, n, 2, 1, a, b, 0, 0, -1, ref w, ref m, ref nsplit, ref iblock, ref isplit, ref errorcode); if( !result || m==0 ) { m = 0; return result; } d = new double[m-1+1]; i1_ = (1) - (0); for(i_=0; i_<=m-1;i_++) { d[i_] = w[i_+i1_]; } return result; } // // Eigen vectors are multiplied by Z // if( zneeded==1 ) { // // Find eigen pairs // result = internalbisectioneigenvalues(d1, e1, n, 2, 2, a, b, 0, 0, -1, ref w, ref m, ref nsplit, ref iblock, ref isplit, ref errorcode); if( !result || m==0 ) { m = 0; return result; } internaldstein(n, d1, e1, m, w, iblock, isplit, ref z2, ref ifail, ref cr); if( cr!=0 ) { m = 0; result = false; return result; } // // Sort eigen values and vectors // for(i=1; i<=m; i++) { k = i; for(j=i; j<=m; j++) { if( (double)(w[j])<(double)(w[k]) ) { k = j; } } v = w[i]; w[i] = w[k]; w[k] = v; for(j=1; j<=n; j++) { v = z2[j,i]; z2[j,i] = z2[j,k]; z2[j,k] = v; } } // // Transform Z2 and overwrite Z // z3 = new double[m+1, n+1]; for(i=1; i<=m; i++) { for(i_=1; i_<=n;i_++) { z3[i,i_] = z2[i_,i]; } } for(i=1; i<=n; i++) { for(j=1; j<=m; j++) { i1_ = (1)-(0); v = 0.0; for(i_=0; i_<=n-1;i_++) { v += z[i-1,i_]*z3[j,i_+i1_]; } z2[i,j] = v; } } z = new double[n-1+1, m-1+1]; for(i=1; i<=m; i++) { i1_ = (1) - (0); for(i_=0; i_<=n-1;i_++) { z[i_,i-1] = z2[i_+i1_,i]; } } // // Store W // d = new double[m-1+1]; for(i=1; i<=m; i++) { d[i-1] = w[i]; } return result; } // // Eigen vectors are stored in Z // if( zneeded==2 ) { // // Find eigen pairs // result = internalbisectioneigenvalues(d1, e1, n, 2, 2, a, b, 0, 0, -1, ref w, ref m, ref nsplit, ref iblock, ref isplit, ref errorcode); if( !result || m==0 ) { m = 0; return result; } internaldstein(n, d1, e1, m, w, iblock, isplit, ref z2, ref ifail, ref cr); if( cr!=0 ) { m = 0; result = false; return result; } // // Sort eigen values and vectors // for(i=1; i<=m; i++) { k = i; for(j=i; j<=m; j++) { if( (double)(w[j])<(double)(w[k]) ) { k = j; } } v = w[i]; w[i] = w[k]; w[k] = v; for(j=1; j<=n; j++) { v = z2[j,i]; z2[j,i] = z2[j,k]; z2[j,k] = v; } } // // Store W // d = new double[m-1+1]; for(i=1; i<=m; i++) { d[i-1] = w[i]; } z = new double[n-1+1, m-1+1]; for(i=1; i<=m; i++) { i1_ = (1) - (0); for(i_=0; i_<=n-1;i_++) { z[i_,i-1] = z2[i_+i1_,i]; } } return result; } result = false; return result; } /************************************************************************* Subroutine for finding tridiagonal matrix eigenvalues/vectors with given indexes (in ascending order) by using the bisection and inverse iteraion. Input parameters: D - the main diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-1]. E - the secondary diagonal of a tridiagonal matrix. Array whose index ranges within [0..N-2]. N - size of matrix. N>=0. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not needed; * 1, the eigenvectors of a tridiagonal matrix are multiplied by the square matrix Z. It is used if the tridiagonal matrix is obtained by the similarity transformation of a symmetric matrix. * 2, the eigenvectors of a tridiagonal matrix replace matrix Z. I1, I2 - index interval for searching (from I1 to I2). 0 <= I1 <= I2 <= N-1. Z - if ZNeeded is equal to: * 0, Z isn't used and remains unchanged; * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1]) which reduces the given symmetric matrix to tridiagonal form; * 2, Z isn't used (but changed on the exit). Output parameters: D - array of the eigenvalues found. Array whose index ranges within [0..I2-I1]. Z - if ZNeeded is equal to: * 0, doesn't contain any information; * 1, contains the product of a given NxN matrix Z (from the left) and Nx(I2-I1) matrix of the eigenvectors found (from the right). Array whose indexes range within [0..N-1, 0..I2-I1]. * 2, contains the matrix of the eigenvalues found. Array whose indexes range within [0..N-1, 0..I2-I1]. Result: True, if successful. In that case, D contains the eigenvalues, Z contains the eigenvectors (if needed). It should be noted that the subroutine changes the size of arrays D and Z. False, if the bisection method subroutine wasn't able to find the eigenvalues in the given interval or if the inverse iteration subroutine wasn't able to find all the corresponding eigenvectors. In that case, the eigenvalues and eigenvectors are not returned. -- ALGLIB -- Copyright 25.12.2005 by Bochkanov Sergey *************************************************************************/ public static bool smatrixtdevdi(ref double[] d, double[] e, int n, int zneeded, int i1, int i2, ref double[,] z) { bool result = new bool(); int errorcode = 0; int nsplit = 0; int i = 0; int j = 0; int k = 0; int m = 0; int cr = 0; int[] iblock = new int[0]; int[] isplit = new int[0]; int[] ifail = new int[0]; double[] w = new double[0]; double[] d1 = new double[0]; double[] e1 = new double[0]; double[,] z2 = new double[0,0]; double[,] z3 = new double[0,0]; double v = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert((0<=i1 && i1<=i2) && i21 ) { e1 = new double[n-1+1]; i1_ = (0) - (1); for(i_=1; i_<=n-1;i_++) { e1[i_] = e[i_+i1_]; } } // // No eigen vectors // if( zneeded==0 ) { result = internalbisectioneigenvalues(d1, e1, n, 3, 1, 0, 0, i1+1, i2+1, -1, ref w, ref m, ref nsplit, ref iblock, ref isplit, ref errorcode); if( !result ) { return result; } if( m!=i2-i1+1 ) { result = false; return result; } d = new double[m-1+1]; for(i=1; i<=m; i++) { d[i-1] = w[i]; } return result; } // // Eigen vectors are multiplied by Z // if( zneeded==1 ) { // // Find eigen pairs // result = internalbisectioneigenvalues(d1, e1, n, 3, 2, 0, 0, i1+1, i2+1, -1, ref w, ref m, ref nsplit, ref iblock, ref isplit, ref errorcode); if( !result ) { return result; } if( m!=i2-i1+1 ) { result = false; return result; } internaldstein(n, d1, e1, m, w, iblock, isplit, ref z2, ref ifail, ref cr); if( cr!=0 ) { result = false; return result; } // // Sort eigen values and vectors // for(i=1; i<=m; i++) { k = i; for(j=i; j<=m; j++) { if( (double)(w[j])<(double)(w[k]) ) { k = j; } } v = w[i]; w[i] = w[k]; w[k] = v; for(j=1; j<=n; j++) { v = z2[j,i]; z2[j,i] = z2[j,k]; z2[j,k] = v; } } // // Transform Z2 and overwrite Z // z3 = new double[m+1, n+1]; for(i=1; i<=m; i++) { for(i_=1; i_<=n;i_++) { z3[i,i_] = z2[i_,i]; } } for(i=1; i<=n; i++) { for(j=1; j<=m; j++) { i1_ = (1)-(0); v = 0.0; for(i_=0; i_<=n-1;i_++) { v += z[i-1,i_]*z3[j,i_+i1_]; } z2[i,j] = v; } } z = new double[n-1+1, m-1+1]; for(i=1; i<=m; i++) { i1_ = (1) - (0); for(i_=0; i_<=n-1;i_++) { z[i_,i-1] = z2[i_+i1_,i]; } } // // Store W // d = new double[m-1+1]; for(i=1; i<=m; i++) { d[i-1] = w[i]; } return result; } // // Eigen vectors are stored in Z // if( zneeded==2 ) { // // Find eigen pairs // result = internalbisectioneigenvalues(d1, e1, n, 3, 2, 0, 0, i1+1, i2+1, -1, ref w, ref m, ref nsplit, ref iblock, ref isplit, ref errorcode); if( !result ) { return result; } if( m!=i2-i1+1 ) { result = false; return result; } internaldstein(n, d1, e1, m, w, iblock, isplit, ref z2, ref ifail, ref cr); if( cr!=0 ) { result = false; return result; } // // Sort eigen values and vectors // for(i=1; i<=m; i++) { k = i; for(j=i; j<=m; j++) { if( (double)(w[j])<(double)(w[k]) ) { k = j; } } v = w[i]; w[i] = w[k]; w[k] = v; for(j=1; j<=n; j++) { v = z2[j,i]; z2[j,i] = z2[j,k]; z2[j,k] = v; } } // // Store Z // z = new double[n-1+1, m-1+1]; for(i=1; i<=m; i++) { i1_ = (1) - (0); for(i_=0; i_<=n-1;i_++) { z[i_,i-1] = z2[i_+i1_,i]; } } // // Store W // d = new double[m-1+1]; for(i=1; i<=m; i++) { d[i-1] = w[i]; } return result; } result = false; return result; } /************************************************************************* Finding eigenvalues and eigenvectors of a general (unsymmetric) matrix COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes one important improvement of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. Speed-up provided by MKL for this particular problem (EVD) ! is really high, because MKL uses combination of (a) better low-level ! optimizations, and (b) better EVD algorithms. ! ! On one particular SSE-capable machine for N=1024, commercial MKL- ! -capable ALGLIB was: ! * 7-10 times faster than open source "generic C" version ! * 15-18 times faster than "pure C#" version ! ! Multithreaded acceleration is NOT supported for this function. ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. The algorithm finds eigenvalues and eigenvectors of a general matrix by using the QR algorithm with multiple shifts. The algorithm can find eigenvalues and both left and right eigenvectors. The right eigenvector is a vector x such that A*x = w*x, and the left eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex conjugate transposition of vector y). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. VNeeded - flag controlling whether eigenvectors are needed or not. If VNeeded is equal to: * 0, eigenvectors are not returned; * 1, right eigenvectors are returned; * 2, left eigenvectors are returned; * 3, both left and right eigenvectors are returned. Output parameters: WR - real parts of eigenvalues. Array whose index ranges within [0..N-1]. WR - imaginary parts of eigenvalues. Array whose index ranges within [0..N-1]. VL, VR - arrays of left and right eigenvectors (if they are needed). If WI[i]=0, the respective eigenvalue is a real number, and it corresponds to the column number I of matrices VL/VR. If WI[i]>0, we have a pair of complex conjugate numbers with positive and negative imaginary parts: the first eigenvalue WR[i] + sqrt(-1)*WI[i]; the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1]; WI[i]>0 WI[i+1] = -WI[i] < 0 In that case, the eigenvector corresponding to the first eigenvalue is located in i and i+1 columns of matrices VL/VR (the column number i contains the real part, and the column number i+1 contains the imaginary part), and the vector corresponding to the second eigenvalue is a complex conjugate to the first vector. Arrays whose indexes range within [0..N-1, 0..N-1]. Result: True, if the algorithm has converged. False, if the algorithm has not converged. Note 1: Some users may ask the following question: what if WI[N-1]>0? WI[N] must contain an eigenvalue which is complex conjugate to the N-th eigenvalue, but the array has only size N? The answer is as follows: such a situation cannot occur because the algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is strictly less than N-1. Note 2: The algorithm performance depends on the value of the internal parameter NS of the InternalSchurDecomposition subroutine which defines the number of shifts in the QR algorithm (similarly to the block width in block-matrix algorithms of linear algebra). If you require maximum performance on your machine, it is recommended to adjust this parameter manually. See also the InternalTREVC subroutine. The algorithm is based on the LAPACK 3.0 library. *************************************************************************/ public static bool rmatrixevd(double[,] a, int n, int vneeded, ref double[] wr, ref double[] wi, ref double[,] vl, ref double[,] vr) { bool result = new bool(); double[,] a1 = new double[0,0]; double[,] vl1 = new double[0,0]; double[,] vr1 = new double[0,0]; double[,] s1 = new double[0,0]; double[,] s = new double[0,0]; double[,] dummy = new double[0,0]; double[] wr1 = new double[0]; double[] wi1 = new double[0]; double[] tau = new double[0]; int i = 0; int info = 0; bool[] sel1 = new bool[0]; int m1 = 0; int i_ = 0; a = (double[,])a.Clone(); wr = new double[0]; wi = new double[0]; vl = new double[0,0]; vr = new double[0,0]; alglib.ap.assert(vneeded>=0 && vneeded<=3, "RMatrixEVD: incorrect VNeeded!"); if( vneeded==0 ) { // // Eigen values only // ortfac.rmatrixhessenberg(ref a, n, ref tau); hsschur.rmatrixinternalschurdecomposition(a, n, 0, 0, ref wr, ref wi, ref dummy, ref info); result = info==0; return result; } // // Eigen values and vectors // ortfac.rmatrixhessenberg(ref a, n, ref tau); ortfac.rmatrixhessenbergunpackq(a, n, tau, ref s); hsschur.rmatrixinternalschurdecomposition(a, n, 1, 1, ref wr, ref wi, ref s, ref info); result = info==0; if( !result ) { return result; } if( vneeded==1 || vneeded==3 ) { vr = new double[n, n]; for(i=0; i<=n-1; i++) { for(i_=0; i_<=n-1;i_++) { vr[i,i_] = s[i,i_]; } } } if( vneeded==2 || vneeded==3 ) { vl = new double[n, n]; for(i=0; i<=n-1; i++) { for(i_=0; i_<=n-1;i_++) { vl[i,i_] = s[i,i_]; } } } rmatrixinternaltrevc(a, n, vneeded, 1, sel1, ref vl, ref vr, ref m1, ref info); result = info==0; return result; } private static bool tridiagonalevd(ref double[] d, double[] e, int n, int zneeded, ref double[,] z) { bool result = new bool(); int maxit = 0; int i = 0; int ii = 0; int iscale = 0; int j = 0; int jtot = 0; int k = 0; int t = 0; int l = 0; int l1 = 0; int lend = 0; int lendm1 = 0; int lendp1 = 0; int lendsv = 0; int lm1 = 0; int lsv = 0; int m = 0; int mm1 = 0; int nm1 = 0; int nmaxit = 0; int tmpint = 0; double anorm = 0; double b = 0; double c = 0; double eps = 0; double eps2 = 0; double f = 0; double g = 0; double p = 0; double r = 0; double rt1 = 0; double rt2 = 0; double s = 0; double safmax = 0; double safmin = 0; double ssfmax = 0; double ssfmin = 0; double tst = 0; double tmp = 0; double[] work1 = new double[0]; double[] work2 = new double[0]; double[] workc = new double[0]; double[] works = new double[0]; double[] wtemp = new double[0]; bool gotoflag = new bool(); int zrows = 0; bool wastranspose = new bool(); int i_ = 0; e = (double[])e.Clone(); alglib.ap.assert(zneeded>=0 && zneeded<=3, "TridiagonalEVD: Incorrent ZNeeded"); // // Quick return if possible // if( zneeded<0 || zneeded>3 ) { result = false; return result; } result = true; if( n==0 ) { return result; } if( n==1 ) { if( zneeded==2 || zneeded==3 ) { z = new double[1+1, 1+1]; z[1,1] = 1; } return result; } maxit = 30; // // Initialize arrays // wtemp = new double[n+1]; work1 = new double[n-1+1]; work2 = new double[n-1+1]; workc = new double[n+1]; works = new double[n+1]; // // Determine the unit roundoff and over/underflow thresholds. // eps = math.machineepsilon; eps2 = math.sqr(eps); safmin = math.minrealnumber; safmax = math.maxrealnumber; ssfmax = Math.Sqrt(safmax)/3; ssfmin = Math.Sqrt(safmin)/eps2; // // Prepare Z // // Here we are using transposition to get rid of column operations // // wastranspose = false; zrows = 0; if( zneeded==1 ) { zrows = n; } if( zneeded==2 ) { zrows = n; } if( zneeded==3 ) { zrows = 1; } if( zneeded==1 ) { wastranspose = true; blas.inplacetranspose(ref z, 1, n, 1, n, ref wtemp); } if( zneeded==2 ) { wastranspose = true; z = new double[n+1, n+1]; for(i=1; i<=n; i++) { for(j=1; j<=n; j++) { if( i==j ) { z[i,j] = 1; } else { z[i,j] = 0; } } } } if( zneeded==3 ) { wastranspose = false; z = new double[1+1, n+1]; for(j=1; j<=n; j++) { if( j==1 ) { z[1,j] = 1; } else { z[1,j] = 0; } } } nmaxit = n*maxit; jtot = 0; // // Determine where the matrix splits and choose QL or QR iteration // for each block, according to whether top or bottom diagonal // element is smaller. // l1 = 1; nm1 = n-1; while( true ) { if( l1>n ) { break; } if( l1>1 ) { e[l1-1] = 0; } gotoflag = false; m = l1; if( l1<=nm1 ) { for(m=l1; m<=nm1; m++) { tst = Math.Abs(e[m]); if( (double)(tst)==(double)(0) ) { gotoflag = true; break; } if( (double)(tst)<=(double)(Math.Sqrt(Math.Abs(d[m]))*Math.Sqrt(Math.Abs(d[m+1]))*eps) ) { e[m] = 0; gotoflag = true; break; } } } if( !gotoflag ) { m = n; } // // label 30: // l = l1; lsv = l; lend = m; lendsv = lend; l1 = m+1; if( lend==l ) { continue; } // // Scale submatrix in rows and columns L to LEND // if( l==lend ) { anorm = Math.Abs(d[l]); } else { anorm = Math.Max(Math.Abs(d[l])+Math.Abs(e[l]), Math.Abs(e[lend-1])+Math.Abs(d[lend])); for(i=l+1; i<=lend-1; i++) { anorm = Math.Max(anorm, Math.Abs(d[i])+Math.Abs(e[i])+Math.Abs(e[i-1])); } } iscale = 0; if( (double)(anorm)==(double)(0) ) { continue; } if( (double)(anorm)>(double)(ssfmax) ) { iscale = 1; tmp = ssfmax/anorm; tmpint = lend-1; for(i_=l; i_<=lend;i_++) { d[i_] = tmp*d[i_]; } for(i_=l; i_<=tmpint;i_++) { e[i_] = tmp*e[i_]; } } if( (double)(anorm)<(double)(ssfmin) ) { iscale = 2; tmp = ssfmin/anorm; tmpint = lend-1; for(i_=l; i_<=lend;i_++) { d[i_] = tmp*d[i_]; } for(i_=l; i_<=tmpint;i_++) { e[i_] = tmp*e[i_]; } } // // Choose between QL and QR iteration // if( (double)(Math.Abs(d[lend]))<(double)(Math.Abs(d[l])) ) { lend = lsv; l = lendsv; } if( lend>l ) { // // QL Iteration // // Look for small subdiagonal element. // while( true ) { gotoflag = false; if( l!=lend ) { lendm1 = lend-1; for(m=l; m<=lendm1; m++) { tst = math.sqr(Math.Abs(e[m])); if( (double)(tst)<=(double)(eps2*Math.Abs(d[m])*Math.Abs(d[m+1])+safmin) ) { gotoflag = true; break; } } } if( !gotoflag ) { m = lend; } if( m0 ) { tdevdev2(d[l], e[l], d[l+1], ref rt1, ref rt2, ref c, ref s); work1[l] = c; work2[l] = s; workc[1] = work1[l]; works[1] = work2[l]; if( !wastranspose ) { rotations.applyrotationsfromtheright(false, 1, zrows, l, l+1, workc, works, z, wtemp); } else { rotations.applyrotationsfromtheleft(false, l, l+1, 1, zrows, workc, works, z, wtemp); } } else { tdevde2(d[l], e[l], d[l+1], ref rt1, ref rt2); } d[l] = rt1; d[l+1] = rt2; e[l] = 0; l = l+2; if( l<=lend ) { continue; } // // GOTO 140 // break; } if( jtot==nmaxit ) { // // GOTO 140 // break; } jtot = jtot+1; // // Form shift. // g = (d[l+1]-p)/(2*e[l]); r = tdevdpythag(g, 1); g = d[m]-p+e[l]/(g+tdevdextsign(r, g)); s = 1; c = 1; p = 0; // // Inner loop // mm1 = m-1; for(i=mm1; i>=l; i--) { f = s*e[i]; b = c*e[i]; rotations.generaterotation(g, f, ref c, ref s, ref r); if( i!=m-1 ) { e[i+1] = r; } g = d[i+1]-p; r = (d[i]-g)*s+2*c*b; p = s*r; d[i+1] = g+p; g = c*r-b; // // If eigenvectors are desired, then save rotations. // if( zneeded>0 ) { work1[i] = c; work2[i] = -s; } } // // If eigenvectors are desired, then apply saved rotations. // if( zneeded>0 ) { for(i=l; i<=m-1; i++) { workc[i-l+1] = work1[i]; works[i-l+1] = work2[i]; } if( !wastranspose ) { rotations.applyrotationsfromtheright(false, 1, zrows, l, m, workc, works, z, wtemp); } else { rotations.applyrotationsfromtheleft(false, l, m, 1, zrows, workc, works, z, wtemp); } } d[l] = d[l]-p; e[l] = g; continue; } // // Eigenvalue found. // d[l] = p; l = l+1; if( l<=lend ) { continue; } break; } } else { // // QR Iteration // // Look for small superdiagonal element. // while( true ) { gotoflag = false; if( l!=lend ) { lendp1 = lend+1; for(m=l; m>=lendp1; m--) { tst = math.sqr(Math.Abs(e[m-1])); if( (double)(tst)<=(double)(eps2*Math.Abs(d[m])*Math.Abs(d[m-1])+safmin) ) { gotoflag = true; break; } } } if( !gotoflag ) { m = lend; } if( m>lend ) { e[m-1] = 0; } p = d[l]; if( m!=l ) { // // If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 // to compute its eigensystem. // if( m==l-1 ) { if( zneeded>0 ) { tdevdev2(d[l-1], e[l-1], d[l], ref rt1, ref rt2, ref c, ref s); work1[m] = c; work2[m] = s; workc[1] = c; works[1] = s; if( !wastranspose ) { rotations.applyrotationsfromtheright(true, 1, zrows, l-1, l, workc, works, z, wtemp); } else { rotations.applyrotationsfromtheleft(true, l-1, l, 1, zrows, workc, works, z, wtemp); } } else { tdevde2(d[l-1], e[l-1], d[l], ref rt1, ref rt2); } d[l-1] = rt1; d[l] = rt2; e[l-1] = 0; l = l-2; if( l>=lend ) { continue; } break; } if( jtot==nmaxit ) { break; } jtot = jtot+1; // // Form shift. // g = (d[l-1]-p)/(2*e[l-1]); r = tdevdpythag(g, 1); g = d[m]-p+e[l-1]/(g+tdevdextsign(r, g)); s = 1; c = 1; p = 0; // // Inner loop // lm1 = l-1; for(i=m; i<=lm1; i++) { f = s*e[i]; b = c*e[i]; rotations.generaterotation(g, f, ref c, ref s, ref r); if( i!=m ) { e[i-1] = r; } g = d[i]-p; r = (d[i+1]-g)*s+2*c*b; p = s*r; d[i] = g+p; g = c*r-b; // // If eigenvectors are desired, then save rotations. // if( zneeded>0 ) { work1[i] = c; work2[i] = s; } } // // If eigenvectors are desired, then apply saved rotations. // if( zneeded>0 ) { for(i=m; i<=l-1; i++) { workc[i-m+1] = work1[i]; works[i-m+1] = work2[i]; } if( !wastranspose ) { rotations.applyrotationsfromtheright(true, 1, zrows, m, l, workc, works, z, wtemp); } else { rotations.applyrotationsfromtheleft(true, m, l, 1, zrows, workc, works, z, wtemp); } } d[l] = d[l]-p; e[lm1] = g; continue; } // // Eigenvalue found. // d[l] = p; l = l-1; if( l>=lend ) { continue; } break; } } // // Undo scaling if necessary // if( iscale==1 ) { tmp = anorm/ssfmax; tmpint = lendsv-1; for(i_=lsv; i_<=lendsv;i_++) { d[i_] = tmp*d[i_]; } for(i_=lsv; i_<=tmpint;i_++) { e[i_] = tmp*e[i_]; } } if( iscale==2 ) { tmp = anorm/ssfmin; tmpint = lendsv-1; for(i_=lsv; i_<=lendsv;i_++) { d[i_] = tmp*d[i_]; } for(i_=lsv; i_<=tmpint;i_++) { e[i_] = tmp*e[i_]; } } // // Check for no convergence to an eigenvalue after a total // of N*MAXIT iterations. // if( jtot>=nmaxit ) { result = false; if( wastranspose ) { blas.inplacetranspose(ref z, 1, n, 1, n, ref wtemp); } return result; } } // // Order eigenvalues and eigenvectors. // if( zneeded==0 ) { // // Sort // if( n==1 ) { return result; } if( n==2 ) { if( (double)(d[1])>(double)(d[2]) ) { tmp = d[1]; d[1] = d[2]; d[2] = tmp; } return result; } i = 2; do { t = i; while( t!=1 ) { k = t/2; if( (double)(d[k])>=(double)(d[t]) ) { t = 1; } else { tmp = d[k]; d[k] = d[t]; d[t] = tmp; t = k; } } i = i+1; } while( i<=n ); i = n-1; do { tmp = d[i+1]; d[i+1] = d[1]; d[1] = tmp; t = 1; while( t!=0 ) { k = 2*t; if( k>i ) { t = 0; } else { if( k(double)(d[k]) ) { k = k+1; } } if( (double)(d[t])>=(double)(d[k]) ) { t = 0; } else { tmp = d[k]; d[k] = d[t]; d[t] = tmp; t = k; } } } i = i-1; } while( i>=1 ); } else { // // Use Selection Sort to minimize swaps of eigenvectors // for(ii=2; ii<=n; ii++) { i = ii-1; k = i; p = d[i]; for(j=ii; j<=n; j++) { if( (double)(d[j])<(double)(p) ) { k = j; p = d[j]; } } if( k!=i ) { d[k] = d[i]; d[i] = p; if( wastranspose ) { for(i_=1; i_<=n;i_++) { wtemp[i_] = z[i,i_]; } for(i_=1; i_<=n;i_++) { z[i,i_] = z[k,i_]; } for(i_=1; i_<=n;i_++) { z[k,i_] = wtemp[i_]; } } else { for(i_=1; i_<=zrows;i_++) { wtemp[i_] = z[i_,i]; } for(i_=1; i_<=zrows;i_++) { z[i_,i] = z[i_,k]; } for(i_=1; i_<=zrows;i_++) { z[i_,k] = wtemp[i_]; } } } } if( wastranspose ) { blas.inplacetranspose(ref z, 1, n, 1, n, ref wtemp); } } return result; } /************************************************************************* DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix [ A B ] [ B C ]. On return, RT1 is the eigenvalue of larger absolute value, and RT2 is the eigenvalue of smaller absolute value. -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 *************************************************************************/ private static void tdevde2(double a, double b, double c, ref double rt1, ref double rt2) { double ab = 0; double acmn = 0; double acmx = 0; double adf = 0; double df = 0; double rt = 0; double sm = 0; double tb = 0; rt1 = 0; rt2 = 0; sm = a+c; df = a-c; adf = Math.Abs(df); tb = b+b; ab = Math.Abs(tb); if( (double)(Math.Abs(a))>(double)(Math.Abs(c)) ) { acmx = a; acmn = c; } else { acmx = c; acmn = a; } if( (double)(adf)>(double)(ab) ) { rt = adf*Math.Sqrt(1+math.sqr(ab/adf)); } else { if( (double)(adf)<(double)(ab) ) { rt = ab*Math.Sqrt(1+math.sqr(adf/ab)); } else { // // Includes case AB=ADF=0 // rt = ab*Math.Sqrt(2); } } if( (double)(sm)<(double)(0) ) { rt1 = 0.5*(sm-rt); // // Order of execution important. // To get fully accurate smaller eigenvalue, // next line needs to be executed in higher precision. // rt2 = acmx/rt1*acmn-b/rt1*b; } else { if( (double)(sm)>(double)(0) ) { rt1 = 0.5*(sm+rt); // // Order of execution important. // To get fully accurate smaller eigenvalue, // next line needs to be executed in higher precision. // rt2 = acmx/rt1*acmn-b/rt1*b; } else { // // Includes case RT1 = RT2 = 0 // rt1 = 0.5*rt; rt2 = -(0.5*rt); } } } /************************************************************************* DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix [ A B ] [ B C ]. On return, RT1 is the eigenvalue of larger absolute value, RT2 is the eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right eigenvector for RT1, giving the decomposition [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 *************************************************************************/ private static void tdevdev2(double a, double b, double c, ref double rt1, ref double rt2, ref double cs1, ref double sn1) { int sgn1 = 0; int sgn2 = 0; double ab = 0; double acmn = 0; double acmx = 0; double acs = 0; double adf = 0; double cs = 0; double ct = 0; double df = 0; double rt = 0; double sm = 0; double tb = 0; double tn = 0; rt1 = 0; rt2 = 0; cs1 = 0; sn1 = 0; // // Compute the eigenvalues // sm = a+c; df = a-c; adf = Math.Abs(df); tb = b+b; ab = Math.Abs(tb); if( (double)(Math.Abs(a))>(double)(Math.Abs(c)) ) { acmx = a; acmn = c; } else { acmx = c; acmn = a; } if( (double)(adf)>(double)(ab) ) { rt = adf*Math.Sqrt(1+math.sqr(ab/adf)); } else { if( (double)(adf)<(double)(ab) ) { rt = ab*Math.Sqrt(1+math.sqr(adf/ab)); } else { // // Includes case AB=ADF=0 // rt = ab*Math.Sqrt(2); } } if( (double)(sm)<(double)(0) ) { rt1 = 0.5*(sm-rt); sgn1 = -1; // // Order of execution important. // To get fully accurate smaller eigenvalue, // next line needs to be executed in higher precision. // rt2 = acmx/rt1*acmn-b/rt1*b; } else { if( (double)(sm)>(double)(0) ) { rt1 = 0.5*(sm+rt); sgn1 = 1; // // Order of execution important. // To get fully accurate smaller eigenvalue, // next line needs to be executed in higher precision. // rt2 = acmx/rt1*acmn-b/rt1*b; } else { // // Includes case RT1 = RT2 = 0 // rt1 = 0.5*rt; rt2 = -(0.5*rt); sgn1 = 1; } } // // Compute the eigenvector // if( (double)(df)>=(double)(0) ) { cs = df+rt; sgn2 = 1; } else { cs = df-rt; sgn2 = -1; } acs = Math.Abs(cs); if( (double)(acs)>(double)(ab) ) { ct = -(tb/cs); sn1 = 1/Math.Sqrt(1+ct*ct); cs1 = ct*sn1; } else { if( (double)(ab)==(double)(0) ) { cs1 = 1; sn1 = 0; } else { tn = -(cs/tb); cs1 = 1/Math.Sqrt(1+tn*tn); sn1 = tn*cs1; } } if( sgn1==sgn2 ) { tn = cs1; cs1 = -sn1; sn1 = tn; } } /************************************************************************* Internal routine *************************************************************************/ private static double tdevdpythag(double a, double b) { double result = 0; if( (double)(Math.Abs(a))<(double)(Math.Abs(b)) ) { result = Math.Abs(b)*Math.Sqrt(1+math.sqr(a/b)); } else { result = Math.Abs(a)*Math.Sqrt(1+math.sqr(b/a)); } return result; } /************************************************************************* Internal routine *************************************************************************/ private static double tdevdextsign(double a, double b) { double result = 0; if( (double)(b)>=(double)(0) ) { result = Math.Abs(a); } else { result = -Math.Abs(a); } return result; } private static bool internalbisectioneigenvalues(double[] d, double[] e, int n, int irange, int iorder, double vl, double vu, int il, int iu, double abstol, ref double[] w, ref int m, ref int nsplit, ref int[] iblock, ref int[] isplit, ref int errorcode) { bool result = new bool(); double fudge = 0; double relfac = 0; bool ncnvrg = new bool(); bool toofew = new bool(); int ib = 0; int ibegin = 0; int idiscl = 0; int idiscu = 0; int ie = 0; int iend = 0; int iinfo = 0; int im = 0; int iin = 0; int ioff = 0; int iout = 0; int itmax = 0; int iw = 0; int iwoff = 0; int j = 0; int itmp1 = 0; int jb = 0; int jdisc = 0; int je = 0; int nwl = 0; int nwu = 0; double atoli = 0; double bnorm = 0; double gl = 0; double gu = 0; double pivmin = 0; double rtoli = 0; double safemn = 0; double tmp1 = 0; double tmp2 = 0; double tnorm = 0; double ulp = 0; double wkill = 0; double wl = 0; double wlu = 0; double wu = 0; double wul = 0; double scalefactor = 0; double t = 0; int[] idumma = new int[0]; double[] work = new double[0]; int[] iwork = new int[0]; int[] ia1s2 = new int[0]; double[] ra1s2 = new double[0]; double[,] ra1s2x2 = new double[0,0]; int[,] ia1s2x2 = new int[0,0]; double[] ra1siin = new double[0]; double[] ra2siin = new double[0]; double[] ra3siin = new double[0]; double[] ra4siin = new double[0]; double[,] ra1siinx2 = new double[0,0]; int[,] ia1siinx2 = new int[0,0]; int[] iworkspace = new int[0]; double[] rworkspace = new double[0]; int tmpi = 0; d = (double[])d.Clone(); e = (double[])e.Clone(); w = new double[0]; m = 0; nsplit = 0; iblock = new int[0]; isplit = new int[0]; errorcode = 0; // // Quick return if possible // m = 0; if( n==0 ) { result = true; return result; } // // Get machine constants // NB is the minimum vector length for vector bisection, or 0 // if only scalar is to be done. // fudge = 2; relfac = 2; safemn = math.minrealnumber; ulp = 2*math.machineepsilon; rtoli = ulp*relfac; idumma = new int[1+1]; work = new double[4*n+1]; iwork = new int[3*n+1]; w = new double[n+1]; iblock = new int[n+1]; isplit = new int[n+1]; ia1s2 = new int[2+1]; ra1s2 = new double[2+1]; ra1s2x2 = new double[2+1, 2+1]; ia1s2x2 = new int[2+1, 2+1]; ra1siin = new double[n+1]; ra2siin = new double[n+1]; ra3siin = new double[n+1]; ra4siin = new double[n+1]; ra1siinx2 = new double[n+1, 2+1]; ia1siinx2 = new int[n+1, 2+1]; iworkspace = new int[n+1]; rworkspace = new double[n+1]; // // these initializers are not really necessary, // but without them compiler complains about uninitialized locals // wlu = 0; wul = 0; // // Check for Errors // result = false; errorcode = 0; if( irange<=0 || irange>=4 ) { errorcode = -4; } if( iorder<=0 || iorder>=3 ) { errorcode = -5; } if( n<0 ) { errorcode = -3; } if( irange==2 && (double)(vl)>=(double)(vu) ) { errorcode = -6; } if( irange==3 && (il<1 || il>Math.Max(1, n)) ) { errorcode = -8; } if( irange==3 && (iun) ) { errorcode = -9; } if( errorcode!=0 ) { return result; } // // Initialize error flags // ncnvrg = false; toofew = false; // // Simplifications: // if( (irange==3 && il==1) && iu==n ) { irange = 1; } // // Special Case when N=1 // if( n==1 ) { nsplit = 1; isplit[1] = 1; if( irange==2 && ((double)(vl)>=(double)(d[1]) || (double)(vu)<(double)(d[1])) ) { m = 0; } else { w[1] = d[1]; iblock[1] = 1; m = 1; } result = true; return result; } // // Scaling // t = Math.Abs(d[n]); for(j=1; j<=n-1; j++) { t = Math.Max(t, Math.Abs(d[j])); t = Math.Max(t, Math.Abs(e[j])); } scalefactor = 1; if( (double)(t)!=(double)(0) ) { if( (double)(t)>(double)(Math.Sqrt(Math.Sqrt(math.minrealnumber))*Math.Sqrt(math.maxrealnumber)) ) { scalefactor = t; } if( (double)(t)<(double)(Math.Sqrt(Math.Sqrt(math.maxrealnumber))*Math.Sqrt(math.minrealnumber)) ) { scalefactor = t; } for(j=1; j<=n-1; j++) { d[j] = d[j]/scalefactor; e[j] = e[j]/scalefactor; } d[n] = d[n]/scalefactor; } // // Compute Splitting Points // nsplit = 1; work[n] = 0; pivmin = 1; for(j=2; j<=n; j++) { tmp1 = math.sqr(e[j-1]); if( (double)(Math.Abs(d[j]*d[j-1])*math.sqr(ulp)+safemn)>(double)(tmp1) ) { isplit[nsplit] = j-1; nsplit = nsplit+1; work[j-1] = 0; } else { work[j-1] = tmp1; pivmin = Math.Max(pivmin, tmp1); } } isplit[nsplit] = n; pivmin = pivmin*safemn; // // Compute Interval and ATOLI // if( irange==3 ) { // // RANGE='I': Compute the interval containing eigenvalues // IL through IU. // // Compute Gershgorin interval for entire (split) matrix // and use it as the initial interval // gu = d[1]; gl = d[1]; tmp1 = 0; for(j=1; j<=n-1; j++) { tmp2 = Math.Sqrt(work[j]); gu = Math.Max(gu, d[j]+tmp1+tmp2); gl = Math.Min(gl, d[j]-tmp1-tmp2); tmp1 = tmp2; } gu = Math.Max(gu, d[n]+tmp1); gl = Math.Min(gl, d[n]-tmp1); tnorm = Math.Max(Math.Abs(gl), Math.Abs(gu)); gl = gl-fudge*tnorm*ulp*n-fudge*2*pivmin; gu = gu+fudge*tnorm*ulp*n+fudge*pivmin; // // Compute Iteration parameters // itmax = (int)Math.Ceiling((Math.Log(tnorm+pivmin)-Math.Log(pivmin))/Math.Log(2))+2; if( (double)(abstol)<=(double)(0) ) { atoli = ulp*tnorm; } else { atoli = abstol; } work[n+1] = gl; work[n+2] = gl; work[n+3] = gu; work[n+4] = gu; work[n+5] = gl; work[n+6] = gu; iwork[1] = -1; iwork[2] = -1; iwork[3] = n+1; iwork[4] = n+1; iwork[5] = il-1; iwork[6] = iu; // // Calling DLAEBZ // // DLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E, // WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT, // IWORK, W, IBLOCK, IINFO ) // ia1s2[1] = iwork[5]; ia1s2[2] = iwork[6]; ra1s2[1] = work[n+5]; ra1s2[2] = work[n+6]; ra1s2x2[1,1] = work[n+1]; ra1s2x2[2,1] = work[n+2]; ra1s2x2[1,2] = work[n+3]; ra1s2x2[2,2] = work[n+4]; ia1s2x2[1,1] = iwork[1]; ia1s2x2[2,1] = iwork[2]; ia1s2x2[1,2] = iwork[3]; ia1s2x2[2,2] = iwork[4]; internaldlaebz(3, itmax, n, 2, 2, atoli, rtoli, pivmin, d, e, work, ref ia1s2, ref ra1s2x2, ref ra1s2, ref iout, ref ia1s2x2, ref w, ref iblock, ref iinfo); iwork[5] = ia1s2[1]; iwork[6] = ia1s2[2]; work[n+5] = ra1s2[1]; work[n+6] = ra1s2[2]; work[n+1] = ra1s2x2[1,1]; work[n+2] = ra1s2x2[2,1]; work[n+3] = ra1s2x2[1,2]; work[n+4] = ra1s2x2[2,2]; iwork[1] = ia1s2x2[1,1]; iwork[2] = ia1s2x2[2,1]; iwork[3] = ia1s2x2[1,2]; iwork[4] = ia1s2x2[2,2]; if( iwork[6]==iu ) { wl = work[n+1]; wlu = work[n+3]; nwl = iwork[1]; wu = work[n+4]; wul = work[n+2]; nwu = iwork[4]; } else { wl = work[n+2]; wlu = work[n+4]; nwl = iwork[2]; wu = work[n+3]; wul = work[n+1]; nwu = iwork[3]; } if( ((nwl<0 || nwl>=n) || nwu<1) || nwu>n ) { errorcode = 4; result = false; return result; } } else { // // RANGE='A' or 'V' -- Set ATOLI // tnorm = Math.Max(Math.Abs(d[1])+Math.Abs(e[1]), Math.Abs(d[n])+Math.Abs(e[n-1])); for(j=2; j<=n-1; j++) { tnorm = Math.Max(tnorm, Math.Abs(d[j])+Math.Abs(e[j-1])+Math.Abs(e[j])); } if( (double)(abstol)<=(double)(0) ) { atoli = ulp*tnorm; } else { atoli = abstol; } if( irange==2 ) { wl = vl; wu = vu; } else { wl = 0; wu = 0; } } // // Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU. // NWL accumulates the number of eigenvalues .le. WL, // NWU accumulates the number of eigenvalues .le. WU // m = 0; iend = 0; errorcode = 0; nwl = 0; nwu = 0; for(jb=1; jb<=nsplit; jb++) { ioff = iend; ibegin = ioff+1; iend = isplit[jb]; iin = iend-ioff; if( iin==1 ) { // // Special Case -- IIN=1 // if( irange==1 || (double)(wl)>=(double)(d[ibegin]-pivmin) ) { nwl = nwl+1; } if( irange==1 || (double)(wu)>=(double)(d[ibegin]-pivmin) ) { nwu = nwu+1; } if( irange==1 || ((double)(wl)<(double)(d[ibegin]-pivmin) && (double)(wu)>=(double)(d[ibegin]-pivmin)) ) { m = m+1; w[m] = d[ibegin]; iblock[m] = jb; } } else { // // General Case -- IIN > 1 // // Compute Gershgorin Interval // and use it as the initial interval // gu = d[ibegin]; gl = d[ibegin]; tmp1 = 0; for(j=ibegin; j<=iend-1; j++) { tmp2 = Math.Abs(e[j]); gu = Math.Max(gu, d[j]+tmp1+tmp2); gl = Math.Min(gl, d[j]-tmp1-tmp2); tmp1 = tmp2; } gu = Math.Max(gu, d[iend]+tmp1); gl = Math.Min(gl, d[iend]-tmp1); bnorm = Math.Max(Math.Abs(gl), Math.Abs(gu)); gl = gl-fudge*bnorm*ulp*iin-fudge*pivmin; gu = gu+fudge*bnorm*ulp*iin+fudge*pivmin; // // Compute ATOLI for the current submatrix // if( (double)(abstol)<=(double)(0) ) { atoli = ulp*Math.Max(Math.Abs(gl), Math.Abs(gu)); } else { atoli = abstol; } if( irange>1 ) { if( (double)(gu)<(double)(wl) ) { nwl = nwl+iin; nwu = nwu+iin; continue; } gl = Math.Max(gl, wl); gu = Math.Min(gu, wu); if( (double)(gl)>=(double)(gu) ) { continue; } } // // Set Up Initial Interval // work[n+1] = gl; work[n+iin+1] = gu; // // Calling DLAEBZ // // CALL DLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, // D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), // IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM, // IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) // for(tmpi=1; tmpi<=iin; tmpi++) { ra1siin[tmpi] = d[ibegin-1+tmpi]; if( ibegin-1+tmpiiout-iinfo ) { ncnvrg = true; ib = -jb; } else { ib = jb; } for(je=iwork[j]+1+iwoff; je<=iwork[j+iin]+iwoff; je++) { w[je] = tmp1; iblock[je] = ib; } } m = m+im; } } // // If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU // If NWL+1 < IL or NWU > IU, discard extra eigenvalues. // if( irange==3 ) { im = 0; idiscl = il-1-nwl; idiscu = nwu-iu; if( idiscl>0 || idiscu>0 ) { for(je=1; je<=m; je++) { if( (double)(w[je])<=(double)(wlu) && idiscl>0 ) { idiscl = idiscl-1; } else { if( (double)(w[je])>=(double)(wul) && idiscu>0 ) { idiscu = idiscu-1; } else { im = im+1; w[im] = w[je]; iblock[im] = iblock[je]; } } } m = im; } if( idiscl>0 || idiscu>0 ) { // // Code to deal with effects of bad arithmetic: // Some low eigenvalues to be discarded are not in (WL,WLU], // or high eigenvalues to be discarded are not in (WUL,WU] // so just kill off the smallest IDISCL/largest IDISCU // eigenvalues, by simply finding the smallest/largest // eigenvalue(s). // // (If N(w) is monotone non-decreasing, this should never // happen.) // if( idiscl>0 ) { wkill = wu; for(jdisc=1; jdisc<=idiscl; jdisc++) { iw = 0; for(je=1; je<=m; je++) { if( iblock[je]!=0 && ((double)(w[je])<(double)(wkill) || iw==0) ) { iw = je; wkill = w[je]; } } iblock[iw] = 0; } } if( idiscu>0 ) { wkill = wl; for(jdisc=1; jdisc<=idiscu; jdisc++) { iw = 0; for(je=1; je<=m; je++) { if( iblock[je]!=0 && ((double)(w[je])>(double)(wkill) || iw==0) ) { iw = je; wkill = w[je]; } } iblock[iw] = 0; } } im = 0; for(je=1; je<=m; je++) { if( iblock[je]!=0 ) { im = im+1; w[im] = w[je]; iblock[im] = iblock[je]; } } m = im; } if( idiscl<0 || idiscu<0 ) { toofew = true; } } // // If ORDER='B', do nothing -- the eigenvalues are already sorted // by block. // If ORDER='E', sort the eigenvalues from smallest to largest // if( iorder==1 && nsplit>1 ) { for(je=1; je<=m-1; je++) { ie = 0; tmp1 = w[je]; for(j=je+1; j<=m; j++) { if( (double)(w[j])<(double)(tmp1) ) { ie = j; tmp1 = w[j]; } } if( ie!=0 ) { itmp1 = iblock[ie]; w[ie] = w[je]; iblock[ie] = iblock[je]; w[je] = tmp1; iblock[je] = itmp1; } } } for(j=1; j<=m; j++) { w[j] = w[j]*scalefactor; } errorcode = 0; if( ncnvrg ) { errorcode = errorcode+1; } if( toofew ) { errorcode = errorcode+2; } result = errorcode==0; return result; } private static void internaldstein(int n, double[] d, double[] e, int m, double[] w, int[] iblock, int[] isplit, ref double[,] z, ref int[] ifail, ref int info) { int maxits = 0; int extra = 0; int b1 = 0; int blksiz = 0; int bn = 0; int gpind = 0; int i = 0; int iinfo = 0; int its = 0; int j = 0; int j1 = 0; int jblk = 0; int jmax = 0; int nblk = 0; int nrmchk = 0; double dtpcrt = 0; double eps = 0; double eps1 = 0; double nrm = 0; double onenrm = 0; double ortol = 0; double pertol = 0; double scl = 0; double sep = 0; double tol = 0; double xj = 0; double xjm = 0; double ztr = 0; double[] work1 = new double[0]; double[] work2 = new double[0]; double[] work3 = new double[0]; double[] work4 = new double[0]; double[] work5 = new double[0]; int[] iwork = new int[0]; bool tmpcriterion = new bool(); int ti = 0; int i1 = 0; int i2 = 0; double v = 0; int i_ = 0; int i1_ = 0; e = (double[])e.Clone(); w = (double[])w.Clone(); z = new double[0,0]; ifail = new int[0]; info = 0; maxits = 5; extra = 2; work1 = new double[Math.Max(n, 1)+1]; work2 = new double[Math.Max(n-1, 1)+1]; work3 = new double[Math.Max(n, 1)+1]; work4 = new double[Math.Max(n, 1)+1]; work5 = new double[Math.Max(n, 1)+1]; iwork = new int[Math.Max(n, 1)+1]; ifail = new int[Math.Max(m, 1)+1]; z = new double[Math.Max(n, 1)+1, Math.Max(m, 1)+1]; // // these initializers are not really necessary, // but without them compiler complains about uninitialized locals // gpind = 0; onenrm = 0; ortol = 0; dtpcrt = 0; xjm = 0; // // Test the input parameters. // info = 0; for(i=1; i<=m; i++) { ifail[i] = 0; } if( n<0 ) { info = -1; return; } if( m<0 || m>n ) { info = -4; return; } for(j=2; j<=m; j++) { if( iblock[j]1 ) { eps1 = Math.Abs(eps*xj); pertol = 10*eps1; sep = xj-xjm; if( (double)(sep)<(double)(pertol) ) { xj = xjm+pertol; } } its = 0; nrmchk = 0; // // Get random starting vector. // for(ti=1; ti<=blksiz; ti++) { work1[ti] = 2*math.randomreal()-1; } // // Copy the matrix T so it won't be destroyed in factorization. // for(ti=1; ti<=blksiz-1; ti++) { work2[ti] = e[b1+ti-1]; work3[ti] = e[b1+ti-1]; work4[ti] = d[b1+ti-1]; } work4[blksiz] = d[b1+blksiz-1]; // // Compute LU factors with partial pivoting ( PT = LU ) // tol = 0; tdininternaldlagtf(blksiz, ref work4, xj, ref work2, ref work3, tol, ref work5, ref iwork, ref iinfo); // // Update iteration count. // do { its = its+1; if( its>maxits ) { // // If stopping criterion was not satisfied, update info and // store eigenvector number in array ifail. // info = info+1; ifail[info] = j; break; } // // Normalize and scale the righthand side vector Pb. // v = 0; for(ti=1; ti<=blksiz; ti++) { v = v+Math.Abs(work1[ti]); } scl = blksiz*onenrm*Math.Max(eps, Math.Abs(work4[blksiz]))/v; for(i_=1; i_<=blksiz;i_++) { work1[i_] = scl*work1[i_]; } // // Solve the system LU = Pb. // tdininternaldlagts(blksiz, work4, work2, work3, work5, iwork, ref work1, ref tol, ref iinfo); // // Reorthogonalize by modified Gram-Schmidt if eigenvalues are // close enough. // if( jblk!=1 ) { if( (double)(Math.Abs(xj-xjm))>(double)(ortol) ) { gpind = j; } if( gpind!=j ) { for(i=gpind; i<=j-1; i++) { i1 = b1; i2 = b1+blksiz-1; i1_ = (i1)-(1); ztr = 0.0; for(i_=1; i_<=blksiz;i_++) { ztr += work1[i_]*z[i_+i1_,i]; } i1_ = (i1) - (1); for(i_=1; i_<=blksiz;i_++) { work1[i_] = work1[i_] - ztr*z[i_+i1_,i]; } apserv.touchint(ref i2); } } } // // Check the infinity norm of the iterate. // jmax = blas.vectoridxabsmax(work1, 1, blksiz); nrm = Math.Abs(work1[jmax]); // // Continue for additional iterations after norm reaches // stopping criterion. // tmpcriterion = false; if( (double)(nrm)<(double)(dtpcrt) ) { tmpcriterion = true; } else { nrmchk = nrmchk+1; if( nrmchk1 ) { tol = Math.Max(tol, Math.Max(Math.Abs(a[2]), Math.Abs(b[1]))); } for(k=3; k<=n; k++) { tol = Math.Max(tol, Math.Max(Math.Abs(a[k]), Math.Max(Math.Abs(b[k-1]), Math.Abs(d[k-2])))); } tol = tol*eps; if( (double)(tol)==(double)(0) ) { tol = eps; } } for(k=2; k<=n; k++) { if( iin[k-1]==0 ) { y[k] = y[k]-c[k-1]*y[k-1]; } else { temp = y[k-1]; y[k-1] = y[k]; y[k] = temp-c[k-1]*y[k]; } } for(k=n; k>=1; k--) { if( k<=n-2 ) { temp = y[k]-b[k]*y[k+1]-d[k]*y[k+2]; } else { if( k==n-1 ) { temp = y[k]-b[k]*y[k+1]; } else { temp = y[k]; } } ak = a[k]; pert = Math.Abs(tol); if( (double)(ak)<(double)(0) ) { pert = -pert; } while( true ) { absak = Math.Abs(ak); if( (double)(absak)<(double)(1) ) { if( (double)(absak)<(double)(sfmin) ) { if( (double)(absak)==(double)(0) || (double)(Math.Abs(temp)*sfmin)>(double)(absak) ) { ak = ak+pert; pert = 2*pert; continue; } else { temp = temp*bignum; ak = ak*bignum; } } else { if( (double)(Math.Abs(temp))>(double)(absak*bignum) ) { ak = ak+pert; pert = 2*pert; continue; } } } break; } y[k] = temp/ak; } } private static void internaldlaebz(int ijob, int nitmax, int n, int mmax, int minp, double abstol, double reltol, double pivmin, double[] d, double[] e, double[] e2, ref int[] nval, ref double[,] ab, ref double[] c, ref int mout, ref int[,] nab, ref double[] work, ref int[] iwork, ref int info) { int itmp1 = 0; int itmp2 = 0; int j = 0; int ji = 0; int jit = 0; int jp = 0; int kf = 0; int kfnew = 0; int kl = 0; int klnew = 0; double tmp1 = 0; double tmp2 = 0; mout = 0; info = 0; info = 0; if( ijob<1 || ijob>3 ) { info = -1; return; } // // Initialize NAB // if( ijob==1 ) { // // Compute the number of eigenvalues in the initial intervals. // mout = 0; // //DIR$ NOVECTOR // for(ji=1; ji<=minp; ji++) { for(jp=1; jp<=2; jp++) { tmp1 = d[1]-ab[ji,jp]; if( (double)(Math.Abs(tmp1))<(double)(pivmin) ) { tmp1 = -pivmin; } nab[ji,jp] = 0; if( (double)(tmp1)<=(double)(0) ) { nab[ji,jp] = 1; } for(j=2; j<=n; j++) { tmp1 = d[j]-e2[j-1]/tmp1-ab[ji,jp]; if( (double)(Math.Abs(tmp1))<(double)(pivmin) ) { tmp1 = -pivmin; } if( (double)(tmp1)<=(double)(0) ) { nab[ji,jp] = nab[ji,jp]+1; } } } mout = mout+nab[ji,2]-nab[ji,1]; } return; } // // Initialize for loop // // KF and KL have the following meaning: // Intervals 1,...,KF-1 have converged. // Intervals KF,...,KL still need to be refined. // kf = 1; kl = minp; // // If IJOB=2, initialize C. // If IJOB=3, use the user-supplied starting point. // if( ijob==2 ) { for(ji=1; ji<=minp; ji++) { c[ji] = 0.5*(ab[ji,1]+ab[ji,2]); } } // // Iteration loop // for(jit=1; jit<=nitmax; jit++) { // // Loop over intervals // // // Serial Version of the loop // klnew = kl; for(ji=kf; ji<=kl; ji++) { // // Compute N(w), the number of eigenvalues less than w // tmp1 = c[ji]; tmp2 = d[1]-tmp1; itmp1 = 0; if( (double)(tmp2)<=(double)(pivmin) ) { itmp1 = 1; tmp2 = Math.Min(tmp2, -pivmin); } // // A series of compiler directives to defeat vectorization // for the next loop // //*$PL$ CMCHAR=' ' //CDIR$ NEXTSCALAR //C$DIR SCALAR //CDIR$ NEXT SCALAR //CVD$L NOVECTOR //CDEC$ NOVECTOR //CVD$ NOVECTOR //*VDIR NOVECTOR //*VOCL LOOP,SCALAR //CIBM PREFER SCALAR //*$PL$ CMCHAR='*' // for(j=2; j<=n; j++) { tmp2 = d[j]-e2[j-1]/tmp2-tmp1; if( (double)(tmp2)<=(double)(pivmin) ) { itmp1 = itmp1+1; tmp2 = Math.Min(tmp2, -pivmin); } } if( ijob<=2 ) { // // IJOB=2: Choose all intervals containing eigenvalues. // // Insure that N(w) is monotone // itmp1 = Math.Min(nab[ji,2], Math.Max(nab[ji,1], itmp1)); // // Update the Queue -- add intervals if both halves // contain eigenvalues. // if( itmp1==nab[ji,2] ) { // // No eigenvalue in the upper interval: // just use the lower interval. // ab[ji,2] = tmp1; } else { if( itmp1==nab[ji,1] ) { // // No eigenvalue in the lower interval: // just use the upper interval. // ab[ji,1] = tmp1; } else { if( klnew=nval[ji] ) { ab[ji,2] = tmp1; nab[ji,2] = itmp1; } } } kl = klnew; // // Check for convergence // kfnew = kf; for(ji=kf; ji<=kl; ji++) { tmp1 = Math.Abs(ab[ji,2]-ab[ji,1]); tmp2 = Math.Max(Math.Abs(ab[ji,2]), Math.Abs(ab[ji,1])); if( (double)(tmp1)<(double)(Math.Max(abstol, Math.Max(pivmin, reltol*tmp2))) || nab[ji,1]>=nab[ji,2] ) { // // Converged -- Swap with position KFNEW, // then increment KFNEW // if( ji>kfnew ) { tmp1 = ab[ji,1]; tmp2 = ab[ji,2]; itmp1 = nab[ji,1]; itmp2 = nab[ji,2]; ab[ji,1] = ab[kfnew,1]; ab[ji,2] = ab[kfnew,2]; nab[ji,1] = nab[kfnew,1]; nab[ji,2] = nab[kfnew,2]; ab[kfnew,1] = tmp1; ab[kfnew,2] = tmp2; nab[kfnew,1] = itmp1; nab[kfnew,2] = itmp2; if( ijob==3 ) { itmp1 = nval[ji]; nval[ji] = nval[kfnew]; nval[kfnew] = itmp1; } } kfnew = kfnew+1; } } kf = kfnew; // // Choose Midpoints // for(ji=kf; ji<=kl; ji++) { c[ji] = 0.5*(ab[ji,1]+ab[ji,2]); } // // If no more intervals to refine, quit. // if( kf>kl ) { break; } } // // Converged // info = Math.Max(kl+1-kf, 0); mout = kl; } /************************************************************************* Internal subroutine -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 *************************************************************************/ private static void rmatrixinternaltrevc(double[,] t, int n, int side, int howmny, bool[] vselect, ref double[,] vl, ref double[,] vr, ref int m, ref int info) { int i = 0; int j = 0; double[,] t1 = new double[0,0]; double[,] vl1 = new double[0,0]; double[,] vr1 = new double[0,0]; bool[] vselect1 = new bool[0]; vselect = (bool[])vselect.Clone(); m = 0; info = 0; // // Allocate VL/VR, if needed // if( howmny==2 || howmny==3 ) { if( side==1 || side==3 ) { apserv.rmatrixsetlengthatleast(ref vr, n, n); } if( side==2 || side==3 ) { apserv.rmatrixsetlengthatleast(ref vl, n, n); } } // // Try to use MKL kernel // if( ablasmkl.rmatrixinternaltrevcmkl(t, n, side, howmny, vl, vr, ref m, ref info) ) { return; } // // ALGLIB version // t1 = new double[n+1, n+1]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { t1[i+1,j+1] = t[i,j]; } } if( howmny==3 ) { vselect1 = new bool[n+1]; for(i=0; i<=n-1; i++) { vselect1[1+i] = vselect[i]; } } if( (side==2 || side==3) && howmny==1 ) { vl1 = new double[n+1, n+1]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { vl1[i+1,j+1] = vl[i,j]; } } } if( (side==1 || side==3) && howmny==1 ) { vr1 = new double[n+1, n+1]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { vr1[i+1,j+1] = vr[i,j]; } } } internaltrevc(t1, n, side, howmny, vselect1, ref vl1, ref vr1, ref m, ref info); if( side!=1 ) { apserv.rmatrixsetlengthatleast(ref vl, n, n); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { vl[i,j] = vl1[i+1,j+1]; } } } if( side!=2 ) { apserv.rmatrixsetlengthatleast(ref vr, n, n); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { vr[i,j] = vr1[i+1,j+1]; } } } } /************************************************************************* Internal subroutine -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 *************************************************************************/ private static void internaltrevc(double[,] t, int n, int side, int howmny, bool[] vselect, ref double[,] vl, ref double[,] vr, ref int m, ref int info) { bool allv = new bool(); bool bothv = new bool(); bool leftv = new bool(); bool over = new bool(); bool pair = new bool(); bool rightv = new bool(); bool somev = new bool(); int i = 0; int ierr = 0; int ii = 0; int ip = 0; int iis = 0; int j = 0; int j1 = 0; int j2 = 0; int jnxt = 0; int k = 0; int ki = 0; int n2 = 0; double beta = 0; double bignum = 0; double emax = 0; double rec = 0; double remax = 0; double scl = 0; double smin = 0; double smlnum = 0; double ulp = 0; double unfl = 0; double vcrit = 0; double vmax = 0; double wi = 0; double wr = 0; double xnorm = 0; double[,] x = new double[0,0]; double[] work = new double[0]; double[] temp = new double[0]; double[,] temp11 = new double[0,0]; double[,] temp22 = new double[0,0]; double[,] temp11b = new double[0,0]; double[,] temp21b = new double[0,0]; double[,] temp12b = new double[0,0]; double[,] temp22b = new double[0,0]; bool skipflag = new bool(); int k1 = 0; int k2 = 0; int k3 = 0; int k4 = 0; double vt = 0; bool[] rswap4 = new bool[0]; bool[] zswap4 = new bool[0]; int[,] ipivot44 = new int[0,0]; double[] civ4 = new double[0]; double[] crv4 = new double[0]; int i_ = 0; int i1_ = 0; vselect = (bool[])vselect.Clone(); m = 0; info = 0; x = new double[2+1, 2+1]; temp11 = new double[1+1, 1+1]; temp11b = new double[1+1, 1+1]; temp21b = new double[2+1, 1+1]; temp12b = new double[1+1, 2+1]; temp22b = new double[2+1, 2+1]; temp22 = new double[2+1, 2+1]; work = new double[3*n+1]; temp = new double[n+1]; rswap4 = new bool[4+1]; zswap4 = new bool[4+1]; ipivot44 = new int[4+1, 4+1]; civ4 = new double[4+1]; crv4 = new double[4+1]; if( howmny!=1 ) { if( side==1 || side==3 ) { vr = new double[n+1, n+1]; } if( side==2 || side==3 ) { vl = new double[n+1, n+1]; } } // // Decode and test the input parameters // bothv = side==3; rightv = side==1 || bothv; leftv = side==2 || bothv; allv = howmny==2; over = howmny==1; somev = howmny==3; info = 0; if( n<0 ) { info = -2; return; } if( !rightv && !leftv ) { info = -3; return; } if( (!allv && !over) && !somev ) { info = -4; return; } // // Set M to the number of columns required to store the selected // eigenvectors, standardize the array SELECT if necessary, and // test MM. // if( somev ) { m = 0; pair = false; for(j=1; j<=n; j++) { if( pair ) { pair = false; vselect[j] = false; } else { if( j=1; ki--) { skipflag = false; if( ip==1 ) { skipflag = true; } else { if( ki!=1 ) { if( (double)(t[ki,ki-1])!=(double)(0) ) { ip = -1; } } if( somev ) { if( ip==0 ) { if( !vselect[ki] ) { skipflag = true; } } else { if( !vselect[ki-1] ) { skipflag = true; } } } } if( !skipflag ) { // // Compute the KI-th eigenvalue (WR,WI). // wr = t[ki,ki]; wi = 0; if( ip!=0 ) { wi = Math.Sqrt(Math.Abs(t[ki,ki-1]))*Math.Sqrt(Math.Abs(t[ki-1,ki])); } smin = Math.Max(ulp*(Math.Abs(wr)+Math.Abs(wi)), smlnum); if( ip==0 ) { // // Real right eigenvector // work[ki+n] = 1; // // Form right-hand side // for(k=1; k<=ki-1; k++) { work[k+n] = -t[k,ki]; } // // Solve the upper quasi-triangular system: // (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. // jnxt = ki-1; for(j=ki-1; j>=1; j--) { if( j>jnxt ) { continue; } j1 = j; j2 = j; jnxt = j-1; if( j>1 ) { if( (double)(t[j,j-1])!=(double)(0) ) { j1 = j-1; jnxt = j-2; } } if( j1==j2 ) { // // 1-by-1 diagonal block // temp11[1,1] = t[j,j]; temp11b[1,1] = work[j+n]; internalhsevdlaln2(false, 1, 1, smin, 1, temp11, 1.0, 1.0, temp11b, wr, 0.0, ref rswap4, ref zswap4, ref ipivot44, ref civ4, ref crv4, ref x, ref scl, ref xnorm, ref ierr); // // Scale X(1,1) to avoid overflow when updating // the right-hand side. // if( (double)(xnorm)>(double)(1) ) { if( (double)(work[j])>(double)(bignum/xnorm) ) { x[1,1] = x[1,1]/xnorm; scl = scl/xnorm; } } // // Scale if necessary // if( (double)(scl)!=(double)(1) ) { k1 = n+1; k2 = n+ki; for(i_=k1; i_<=k2;i_++) { work[i_] = scl*work[i_]; } } work[j+n] = x[1,1]; // // Update right-hand side // k1 = 1+n; k2 = j-1+n; k3 = j-1; vt = -x[1,1]; i1_ = (1) - (k1); for(i_=k1; i_<=k2;i_++) { work[i_] = work[i_] + vt*t[i_+i1_,j]; } } else { // // 2-by-2 diagonal block // temp22[1,1] = t[j-1,j-1]; temp22[1,2] = t[j-1,j]; temp22[2,1] = t[j,j-1]; temp22[2,2] = t[j,j]; temp21b[1,1] = work[j-1+n]; temp21b[2,1] = work[j+n]; internalhsevdlaln2(false, 2, 1, smin, 1.0, temp22, 1.0, 1.0, temp21b, wr, 0, ref rswap4, ref zswap4, ref ipivot44, ref civ4, ref crv4, ref x, ref scl, ref xnorm, ref ierr); // // Scale X(1,1) and X(2,1) to avoid overflow when // updating the right-hand side. // if( (double)(xnorm)>(double)(1) ) { beta = Math.Max(work[j-1], work[j]); if( (double)(beta)>(double)(bignum/xnorm) ) { x[1,1] = x[1,1]/xnorm; x[2,1] = x[2,1]/xnorm; scl = scl/xnorm; } } // // Scale if necessary // if( (double)(scl)!=(double)(1) ) { k1 = 1+n; k2 = ki+n; for(i_=k1; i_<=k2;i_++) { work[i_] = scl*work[i_]; } } work[j-1+n] = x[1,1]; work[j+n] = x[2,1]; // // Update right-hand side // k1 = 1+n; k2 = j-2+n; k3 = j-2; k4 = j-1; vt = -x[1,1]; i1_ = (1) - (k1); for(i_=k1; i_<=k2;i_++) { work[i_] = work[i_] + vt*t[i_+i1_,k4]; } vt = -x[2,1]; i1_ = (1) - (k1); for(i_=k1; i_<=k2;i_++) { work[i_] = work[i_] + vt*t[i_+i1_,j]; } } } // // Copy the vector x or Q*x to VR and normalize. // if( !over ) { k1 = 1+n; k2 = ki+n; i1_ = (k1) - (1); for(i_=1; i_<=ki;i_++) { vr[i_,iis] = work[i_+i1_]; } ii = blas.columnidxabsmax(vr, 1, ki, iis); remax = 1/Math.Abs(vr[ii,iis]); for(i_=1; i_<=ki;i_++) { vr[i_,iis] = remax*vr[i_,iis]; } for(k=ki+1; k<=n; k++) { vr[k,iis] = 0; } } else { if( ki>1 ) { for(i_=1; i_<=n;i_++) { temp[i_] = vr[i_,ki]; } blas.matrixvectormultiply(vr, 1, n, 1, ki-1, false, work, 1+n, ki-1+n, 1.0, ref temp, 1, n, work[ki+n]); for(i_=1; i_<=n;i_++) { vr[i_,ki] = temp[i_]; } } ii = blas.columnidxabsmax(vr, 1, n, ki); remax = 1/Math.Abs(vr[ii,ki]); for(i_=1; i_<=n;i_++) { vr[i_,ki] = remax*vr[i_,ki]; } } } else { // // Complex right eigenvector. // // Initial solve // [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. // [ (T(KI,KI-1) T(KI,KI) ) ] // if( (double)(Math.Abs(t[ki-1,ki]))>=(double)(Math.Abs(t[ki,ki-1])) ) { work[ki-1+n] = 1; work[ki+n2] = wi/t[ki-1,ki]; } else { work[ki-1+n] = -(wi/t[ki,ki-1]); work[ki+n2] = 1; } work[ki+n] = 0; work[ki-1+n2] = 0; // // Form right-hand side // for(k=1; k<=ki-2; k++) { work[k+n] = -(work[ki-1+n]*t[k,ki-1]); work[k+n2] = -(work[ki+n2]*t[k,ki]); } // // Solve upper quasi-triangular system: // (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) // jnxt = ki-2; for(j=ki-2; j>=1; j--) { if( j>jnxt ) { continue; } j1 = j; j2 = j; jnxt = j-1; if( j>1 ) { if( (double)(t[j,j-1])!=(double)(0) ) { j1 = j-1; jnxt = j-2; } } if( j1==j2 ) { // // 1-by-1 diagonal block // temp11[1,1] = t[j,j]; temp12b[1,1] = work[j+n]; temp12b[1,2] = work[j+n+n]; internalhsevdlaln2(false, 1, 2, smin, 1.0, temp11, 1.0, 1.0, temp12b, wr, wi, ref rswap4, ref zswap4, ref ipivot44, ref civ4, ref crv4, ref x, ref scl, ref xnorm, ref ierr); // // Scale X(1,1) and X(1,2) to avoid overflow when // updating the right-hand side. // if( (double)(xnorm)>(double)(1) ) { if( (double)(work[j])>(double)(bignum/xnorm) ) { x[1,1] = x[1,1]/xnorm; x[1,2] = x[1,2]/xnorm; scl = scl/xnorm; } } // // Scale if necessary // if( (double)(scl)!=(double)(1) ) { k1 = 1+n; k2 = ki+n; for(i_=k1; i_<=k2;i_++) { work[i_] = scl*work[i_]; } k1 = 1+n2; k2 = ki+n2; for(i_=k1; i_<=k2;i_++) { work[i_] = scl*work[i_]; } } work[j+n] = x[1,1]; work[j+n2] = x[1,2]; // // Update the right-hand side // k1 = 1+n; k2 = j-1+n; k3 = 1; k4 = j-1; vt = -x[1,1]; i1_ = (k3) - (k1); for(i_=k1; i_<=k2;i_++) { work[i_] = work[i_] + vt*t[i_+i1_,j]; } k1 = 1+n2; k2 = j-1+n2; k3 = 1; k4 = j-1; vt = -x[1,2]; i1_ = (k3) - (k1); for(i_=k1; i_<=k2;i_++) { work[i_] = work[i_] + vt*t[i_+i1_,j]; } } else { // // 2-by-2 diagonal block // temp22[1,1] = t[j-1,j-1]; temp22[1,2] = t[j-1,j]; temp22[2,1] = t[j,j-1]; temp22[2,2] = t[j,j]; temp22b[1,1] = work[j-1+n]; temp22b[1,2] = work[j-1+n+n]; temp22b[2,1] = work[j+n]; temp22b[2,2] = work[j+n+n]; internalhsevdlaln2(false, 2, 2, smin, 1.0, temp22, 1.0, 1.0, temp22b, wr, wi, ref rswap4, ref zswap4, ref ipivot44, ref civ4, ref crv4, ref x, ref scl, ref xnorm, ref ierr); // // Scale X to avoid overflow when updating // the right-hand side. // if( (double)(xnorm)>(double)(1) ) { beta = Math.Max(work[j-1], work[j]); if( (double)(beta)>(double)(bignum/xnorm) ) { rec = 1/xnorm; x[1,1] = x[1,1]*rec; x[1,2] = x[1,2]*rec; x[2,1] = x[2,1]*rec; x[2,2] = x[2,2]*rec; scl = scl*rec; } } // // Scale if necessary // if( (double)(scl)!=(double)(1) ) { for(i_=1+n; i_<=ki+n;i_++) { work[i_] = scl*work[i_]; } for(i_=1+n2; i_<=ki+n2;i_++) { work[i_] = scl*work[i_]; } } work[j-1+n] = x[1,1]; work[j+n] = x[2,1]; work[j-1+n2] = x[1,2]; work[j+n2] = x[2,2]; // // Update the right-hand side // vt = -x[1,1]; i1_ = (1) - (n+1); for(i_=n+1; i_<=n+j-2;i_++) { work[i_] = work[i_] + vt*t[i_+i1_,j-1]; } vt = -x[2,1]; i1_ = (1) - (n+1); for(i_=n+1; i_<=n+j-2;i_++) { work[i_] = work[i_] + vt*t[i_+i1_,j]; } vt = -x[1,2]; i1_ = (1) - (n2+1); for(i_=n2+1; i_<=n2+j-2;i_++) { work[i_] = work[i_] + vt*t[i_+i1_,j-1]; } vt = -x[2,2]; i1_ = (1) - (n2+1); for(i_=n2+1; i_<=n2+j-2;i_++) { work[i_] = work[i_] + vt*t[i_+i1_,j]; } } } // // Copy the vector x or Q*x to VR and normalize. // if( !over ) { i1_ = (n+1) - (1); for(i_=1; i_<=ki;i_++) { vr[i_,iis-1] = work[i_+i1_]; } i1_ = (n2+1) - (1); for(i_=1; i_<=ki;i_++) { vr[i_,iis] = work[i_+i1_]; } emax = 0; for(k=1; k<=ki; k++) { emax = Math.Max(emax, Math.Abs(vr[k,iis-1])+Math.Abs(vr[k,iis])); } remax = 1/emax; for(i_=1; i_<=ki;i_++) { vr[i_,iis-1] = remax*vr[i_,iis-1]; } for(i_=1; i_<=ki;i_++) { vr[i_,iis] = remax*vr[i_,iis]; } for(k=ki+1; k<=n; k++) { vr[k,iis-1] = 0; vr[k,iis] = 0; } } else { if( ki>2 ) { for(i_=1; i_<=n;i_++) { temp[i_] = vr[i_,ki-1]; } blas.matrixvectormultiply(vr, 1, n, 1, ki-2, false, work, 1+n, ki-2+n, 1.0, ref temp, 1, n, work[ki-1+n]); for(i_=1; i_<=n;i_++) { vr[i_,ki-1] = temp[i_]; } for(i_=1; i_<=n;i_++) { temp[i_] = vr[i_,ki]; } blas.matrixvectormultiply(vr, 1, n, 1, ki-2, false, work, 1+n2, ki-2+n2, 1.0, ref temp, 1, n, work[ki+n2]); for(i_=1; i_<=n;i_++) { vr[i_,ki] = temp[i_]; } } else { vt = work[ki-1+n]; for(i_=1; i_<=n;i_++) { vr[i_,ki-1] = vt*vr[i_,ki-1]; } vt = work[ki+n2]; for(i_=1; i_<=n;i_++) { vr[i_,ki] = vt*vr[i_,ki]; } } emax = 0; for(k=1; k<=n; k++) { emax = Math.Max(emax, Math.Abs(vr[k,ki-1])+Math.Abs(vr[k,ki])); } remax = 1/emax; for(i_=1; i_<=n;i_++) { vr[i_,ki-1] = remax*vr[i_,ki-1]; } for(i_=1; i_<=n;i_++) { vr[i_,ki] = remax*vr[i_,ki]; } } } iis = iis-1; if( ip!=0 ) { iis = iis-1; } } if( ip==1 ) { ip = 0; } if( ip==-1 ) { ip = 1; } } } if( leftv ) { // // Compute left eigenvectors. // ip = 0; iis = 1; for(ki=1; ki<=n; ki++) { skipflag = false; if( ip==-1 ) { skipflag = true; } else { if( ki!=n ) { if( (double)(t[ki+1,ki])!=(double)(0) ) { ip = 1; } } if( somev ) { if( !vselect[ki] ) { skipflag = true; } } } if( !skipflag ) { // // Compute the KI-th eigenvalue (WR,WI). // wr = t[ki,ki]; wi = 0; if( ip!=0 ) { wi = Math.Sqrt(Math.Abs(t[ki,ki+1]))*Math.Sqrt(Math.Abs(t[ki+1,ki])); } smin = Math.Max(ulp*(Math.Abs(wr)+Math.Abs(wi)), smlnum); if( ip==0 ) { // // Real left eigenvector. // work[ki+n] = 1; // // Form right-hand side // for(k=ki+1; k<=n; k++) { work[k+n] = -t[ki,k]; } // // Solve the quasi-triangular system: // (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK // vmax = 1; vcrit = bignum; jnxt = ki+1; for(j=ki+1; j<=n; j++) { if( j(double)(vcrit) ) { rec = 1/vmax; for(i_=ki+n; i_<=n+n;i_++) { work[i_] = rec*work[i_]; } vmax = 1; vcrit = bignum; } i1_ = (ki+1+n)-(ki+1); vt = 0.0; for(i_=ki+1; i_<=j-1;i_++) { vt += t[i_,j]*work[i_+i1_]; } work[j+n] = work[j+n]-vt; // // Solve (T(J,J)-WR)'*X = WORK // temp11[1,1] = t[j,j]; temp11b[1,1] = work[j+n]; internalhsevdlaln2(false, 1, 1, smin, 1.0, temp11, 1.0, 1.0, temp11b, wr, 0, ref rswap4, ref zswap4, ref ipivot44, ref civ4, ref crv4, ref x, ref scl, ref xnorm, ref ierr); // // Scale if necessary // if( (double)(scl)!=(double)(1) ) { for(i_=ki+n; i_<=n+n;i_++) { work[i_] = scl*work[i_]; } } work[j+n] = x[1,1]; vmax = Math.Max(Math.Abs(work[j+n]), vmax); vcrit = bignum/vmax; } else { // // 2-by-2 diagonal block // // Scale if necessary to avoid overflow when forming // the right-hand side. // beta = Math.Max(work[j], work[j+1]); if( (double)(beta)>(double)(vcrit) ) { rec = 1/vmax; for(i_=ki+n; i_<=n+n;i_++) { work[i_] = rec*work[i_]; } vmax = 1; vcrit = bignum; } i1_ = (ki+1+n)-(ki+1); vt = 0.0; for(i_=ki+1; i_<=j-1;i_++) { vt += t[i_,j]*work[i_+i1_]; } work[j+n] = work[j+n]-vt; i1_ = (ki+1+n)-(ki+1); vt = 0.0; for(i_=ki+1; i_<=j-1;i_++) { vt += t[i_,j+1]*work[i_+i1_]; } work[j+1+n] = work[j+1+n]-vt; // // Solve // [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) // [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) // temp22[1,1] = t[j,j]; temp22[1,2] = t[j,j+1]; temp22[2,1] = t[j+1,j]; temp22[2,2] = t[j+1,j+1]; temp21b[1,1] = work[j+n]; temp21b[2,1] = work[j+1+n]; internalhsevdlaln2(true, 2, 1, smin, 1.0, temp22, 1.0, 1.0, temp21b, wr, 0, ref rswap4, ref zswap4, ref ipivot44, ref civ4, ref crv4, ref x, ref scl, ref xnorm, ref ierr); // // Scale if necessary // if( (double)(scl)!=(double)(1) ) { for(i_=ki+n; i_<=n+n;i_++) { work[i_] = scl*work[i_]; } } work[j+n] = x[1,1]; work[j+1+n] = x[2,1]; vmax = Math.Max(Math.Abs(work[j+n]), Math.Max(Math.Abs(work[j+1+n]), vmax)); vcrit = bignum/vmax; } } // // Copy the vector x or Q*x to VL and normalize. // if( !over ) { i1_ = (ki+n) - (ki); for(i_=ki; i_<=n;i_++) { vl[i_,iis] = work[i_+i1_]; } ii = blas.columnidxabsmax(vl, ki, n, iis); remax = 1/Math.Abs(vl[ii,iis]); for(i_=ki; i_<=n;i_++) { vl[i_,iis] = remax*vl[i_,iis]; } for(k=1; k<=ki-1; k++) { vl[k,iis] = 0; } } else { if( ki=(double)(Math.Abs(t[ki+1,ki])) ) { work[ki+n] = wi/t[ki,ki+1]; work[ki+1+n2] = 1; } else { work[ki+n] = 1; work[ki+1+n2] = -(wi/t[ki+1,ki]); } work[ki+1+n] = 0; work[ki+n2] = 0; // // Form right-hand side // for(k=ki+2; k<=n; k++) { work[k+n] = -(work[ki+n]*t[ki,k]); work[k+n2] = -(work[ki+1+n2]*t[ki+1,k]); } // // Solve complex quasi-triangular system: // ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 // vmax = 1; vcrit = bignum; jnxt = ki+2; for(j=ki+2; j<=n; j++) { if( j(double)(vcrit) ) { rec = 1/vmax; for(i_=ki+n; i_<=n+n;i_++) { work[i_] = rec*work[i_]; } for(i_=ki+n2; i_<=n+n2;i_++) { work[i_] = rec*work[i_]; } vmax = 1; vcrit = bignum; } i1_ = (ki+2+n)-(ki+2); vt = 0.0; for(i_=ki+2; i_<=j-1;i_++) { vt += t[i_,j]*work[i_+i1_]; } work[j+n] = work[j+n]-vt; i1_ = (ki+2+n2)-(ki+2); vt = 0.0; for(i_=ki+2; i_<=j-1;i_++) { vt += t[i_,j]*work[i_+i1_]; } work[j+n2] = work[j+n2]-vt; // // Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 // temp11[1,1] = t[j,j]; temp12b[1,1] = work[j+n]; temp12b[1,2] = work[j+n+n]; internalhsevdlaln2(false, 1, 2, smin, 1.0, temp11, 1.0, 1.0, temp12b, wr, -wi, ref rswap4, ref zswap4, ref ipivot44, ref civ4, ref crv4, ref x, ref scl, ref xnorm, ref ierr); // // Scale if necessary // if( (double)(scl)!=(double)(1) ) { for(i_=ki+n; i_<=n+n;i_++) { work[i_] = scl*work[i_]; } for(i_=ki+n2; i_<=n+n2;i_++) { work[i_] = scl*work[i_]; } } work[j+n] = x[1,1]; work[j+n2] = x[1,2]; vmax = Math.Max(Math.Abs(work[j+n]), Math.Max(Math.Abs(work[j+n2]), vmax)); vcrit = bignum/vmax; } else { // // 2-by-2 diagonal block // // Scale if necessary to avoid overflow when forming // the right-hand side elements. // beta = Math.Max(work[j], work[j+1]); if( (double)(beta)>(double)(vcrit) ) { rec = 1/vmax; for(i_=ki+n; i_<=n+n;i_++) { work[i_] = rec*work[i_]; } for(i_=ki+n2; i_<=n+n2;i_++) { work[i_] = rec*work[i_]; } vmax = 1; vcrit = bignum; } i1_ = (ki+2+n)-(ki+2); vt = 0.0; for(i_=ki+2; i_<=j-1;i_++) { vt += t[i_,j]*work[i_+i1_]; } work[j+n] = work[j+n]-vt; i1_ = (ki+2+n2)-(ki+2); vt = 0.0; for(i_=ki+2; i_<=j-1;i_++) { vt += t[i_,j]*work[i_+i1_]; } work[j+n2] = work[j+n2]-vt; i1_ = (ki+2+n)-(ki+2); vt = 0.0; for(i_=ki+2; i_<=j-1;i_++) { vt += t[i_,j+1]*work[i_+i1_]; } work[j+1+n] = work[j+1+n]-vt; i1_ = (ki+2+n2)-(ki+2); vt = 0.0; for(i_=ki+2; i_<=j-1;i_++) { vt += t[i_,j+1]*work[i_+i1_]; } work[j+1+n2] = work[j+1+n2]-vt; // // Solve 2-by-2 complex linear equation // ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B // ([T(j+1,j) T(j+1,j+1)] ) // temp22[1,1] = t[j,j]; temp22[1,2] = t[j,j+1]; temp22[2,1] = t[j+1,j]; temp22[2,2] = t[j+1,j+1]; temp22b[1,1] = work[j+n]; temp22b[1,2] = work[j+n+n]; temp22b[2,1] = work[j+1+n]; temp22b[2,2] = work[j+1+n+n]; internalhsevdlaln2(true, 2, 2, smin, 1.0, temp22, 1.0, 1.0, temp22b, wr, -wi, ref rswap4, ref zswap4, ref ipivot44, ref civ4, ref crv4, ref x, ref scl, ref xnorm, ref ierr); // // Scale if necessary // if( (double)(scl)!=(double)(1) ) { for(i_=ki+n; i_<=n+n;i_++) { work[i_] = scl*work[i_]; } for(i_=ki+n2; i_<=n+n2;i_++) { work[i_] = scl*work[i_]; } } work[j+n] = x[1,1]; work[j+n2] = x[1,2]; work[j+1+n] = x[2,1]; work[j+1+n2] = x[2,2]; vmax = Math.Max(Math.Abs(x[1,1]), vmax); vmax = Math.Max(Math.Abs(x[1,2]), vmax); vmax = Math.Max(Math.Abs(x[2,1]), vmax); vmax = Math.Max(Math.Abs(x[2,2]), vmax); vcrit = bignum/vmax; } } // // Copy the vector x or Q*x to VL and normalize. // if( !over ) { i1_ = (ki+n) - (ki); for(i_=ki; i_<=n;i_++) { vl[i_,iis] = work[i_+i1_]; } i1_ = (ki+n2) - (ki); for(i_=ki; i_<=n;i_++) { vl[i_,iis+1] = work[i_+i1_]; } emax = 0; for(k=ki; k<=n; k++) { emax = Math.Max(emax, Math.Abs(vl[k,iis])+Math.Abs(vl[k,iis+1])); } remax = 1/emax; for(i_=ki; i_<=n;i_++) { vl[i_,iis] = remax*vl[i_,iis]; } for(i_=ki; i_<=n;i_++) { vl[i_,iis+1] = remax*vl[i_,iis+1]; } for(k=1; k<=ki-1; k++) { vl[k,iis] = 0; vl[k,iis+1] = 0; } } else { if( ki(double)(1) ) { if( (double)(bnorm)>(double)(bignum*cnorm) ) { scl = 1/bnorm; } } // // Compute X // x[1,1] = b[1,1]*scl/csr; xnorm = Math.Abs(x[1,1]); } else { // // Complex 1x1 system (w is complex) // // C = ca A - w D // csr = ca*a[1,1]-wr*d1; csi = -(wi*d1); cnorm = Math.Abs(csr)+Math.Abs(csi); // // If | C | < SMINI, use C = SMINI // if( (double)(cnorm)<(double)(smini) ) { csr = smini; csi = 0; cnorm = smini; info = 1; } // // Check scaling for X = B / C // bnorm = Math.Abs(b[1,1])+Math.Abs(b[1,2]); if( (double)(cnorm)<(double)(1) && (double)(bnorm)>(double)(1) ) { if( (double)(bnorm)>(double)(bignum*cnorm) ) { scl = 1/bnorm; } } // // Compute X // internalhsevdladiv(scl*b[1,1], scl*b[1,2], csr, csi, ref tmp1, ref tmp2); x[1,1] = tmp1; x[1,2] = tmp2; xnorm = Math.Abs(x[1,1])+Math.Abs(x[1,2]); } } else { // // 2x2 System // // Compute the real part of C = ca A - w D (or ca A' - w D ) // crv4[1+0] = ca*a[1,1]-wr*d1; crv4[2+2] = ca*a[2,2]-wr*d2; if( ltrans ) { crv4[1+2] = ca*a[2,1]; crv4[2+0] = ca*a[1,2]; } else { crv4[2+0] = ca*a[2,1]; crv4[1+2] = ca*a[1,2]; } if( nw==1 ) { // // Real 2x2 system (w is real) // // Find the largest element in C // cmax = 0; icmax = 0; for(j=1; j<=4; j++) { if( (double)(Math.Abs(crv4[j]))>(double)(cmax) ) { cmax = Math.Abs(crv4[j]); icmax = j; } } // // If norm(C) < SMINI, use SMINI*identity. // if( (double)(cmax)<(double)(smini) ) { bnorm = Math.Max(Math.Abs(b[1,1]), Math.Abs(b[2,1])); if( (double)(smini)<(double)(1) && (double)(bnorm)>(double)(1) ) { if( (double)(bnorm)>(double)(bignum*smini) ) { scl = 1/bnorm; } } temp = scl/smini; x[1,1] = temp*b[1,1]; x[2,1] = temp*b[2,1]; xnorm = temp*bnorm; info = 1; return; } // // Gaussian elimination with complete pivoting. // ur11 = crv4[icmax]; cr21 = crv4[ipivot44[2,icmax]]; ur12 = crv4[ipivot44[3,icmax]]; cr22 = crv4[ipivot44[4,icmax]]; ur11r = 1/ur11; lr21 = ur11r*cr21; ur22 = cr22-ur12*lr21; // // If smaller pivot < SMINI, use SMINI // if( (double)(Math.Abs(ur22))<(double)(smini) ) { ur22 = smini; info = 1; } if( rswap4[icmax] ) { br1 = b[2,1]; br2 = b[1,1]; } else { br1 = b[1,1]; br2 = b[2,1]; } br2 = br2-lr21*br1; bbnd = Math.Max(Math.Abs(br1*(ur22*ur11r)), Math.Abs(br2)); if( (double)(bbnd)>(double)(1) && (double)(Math.Abs(ur22))<(double)(1) ) { if( (double)(bbnd)>=(double)(bignum*Math.Abs(ur22)) ) { scl = 1/bbnd; } } xr2 = br2*scl/ur22; xr1 = scl*br1*ur11r-xr2*(ur11r*ur12); if( zswap4[icmax] ) { x[1,1] = xr2; x[2,1] = xr1; } else { x[1,1] = xr1; x[2,1] = xr2; } xnorm = Math.Max(Math.Abs(xr1), Math.Abs(xr2)); // // Further scaling if norm(A) norm(X) > overflow // if( (double)(xnorm)>(double)(1) && (double)(cmax)>(double)(1) ) { if( (double)(xnorm)>(double)(bignum/cmax) ) { temp = cmax/bignum; x[1,1] = temp*x[1,1]; x[2,1] = temp*x[2,1]; xnorm = temp*xnorm; scl = temp*scl; } } } else { // // Complex 2x2 system (w is complex) // // Find the largest element in C // civ4[1+0] = -(wi*d1); civ4[2+0] = 0; civ4[1+2] = 0; civ4[2+2] = -(wi*d2); cmax = 0; icmax = 0; for(j=1; j<=4; j++) { if( (double)(Math.Abs(crv4[j])+Math.Abs(civ4[j]))>(double)(cmax) ) { cmax = Math.Abs(crv4[j])+Math.Abs(civ4[j]); icmax = j; } } // // If norm(C) < SMINI, use SMINI*identity. // if( (double)(cmax)<(double)(smini) ) { bnorm = Math.Max(Math.Abs(b[1,1])+Math.Abs(b[1,2]), Math.Abs(b[2,1])+Math.Abs(b[2,2])); if( (double)(smini)<(double)(1) && (double)(bnorm)>(double)(1) ) { if( (double)(bnorm)>(double)(bignum*smini) ) { scl = 1/bnorm; } } temp = scl/smini; x[1,1] = temp*b[1,1]; x[2,1] = temp*b[2,1]; x[1,2] = temp*b[1,2]; x[2,2] = temp*b[2,2]; xnorm = temp*bnorm; info = 1; return; } // // Gaussian elimination with complete pivoting. // ur11 = crv4[icmax]; ui11 = civ4[icmax]; cr21 = crv4[ipivot44[2,icmax]]; ci21 = civ4[ipivot44[2,icmax]]; ur12 = crv4[ipivot44[3,icmax]]; ui12 = civ4[ipivot44[3,icmax]]; cr22 = crv4[ipivot44[4,icmax]]; ci22 = civ4[ipivot44[4,icmax]]; if( icmax==1 || icmax==4 ) { // // Code when off-diagonals of pivoted C are real // if( (double)(Math.Abs(ur11))>(double)(Math.Abs(ui11)) ) { temp = ui11/ur11; ur11r = 1/(ur11*(1+math.sqr(temp))); ui11r = -(temp*ur11r); } else { temp = ur11/ui11; ui11r = -(1/(ui11*(1+math.sqr(temp)))); ur11r = -(temp*ui11r); } lr21 = cr21*ur11r; li21 = cr21*ui11r; ur12s = ur12*ur11r; ui12s = ur12*ui11r; ur22 = cr22-ur12*lr21; ui22 = ci22-ur12*li21; } else { // // Code when diagonals of pivoted C are real // ur11r = 1/ur11; ui11r = 0; lr21 = cr21*ur11r; li21 = ci21*ur11r; ur12s = ur12*ur11r; ui12s = ui12*ur11r; ur22 = cr22-ur12*lr21+ui12*li21; ui22 = -(ur12*li21)-ui12*lr21; } u22abs = Math.Abs(ur22)+Math.Abs(ui22); // // If smaller pivot < SMINI, use SMINI // if( (double)(u22abs)<(double)(smini) ) { ur22 = smini; ui22 = 0; info = 1; } if( rswap4[icmax] ) { br2 = b[1,1]; br1 = b[2,1]; bi2 = b[1,2]; bi1 = b[2,2]; } else { br1 = b[1,1]; br2 = b[2,1]; bi1 = b[1,2]; bi2 = b[2,2]; } br2 = br2-lr21*br1+li21*bi1; bi2 = bi2-li21*br1-lr21*bi1; bbnd = Math.Max((Math.Abs(br1)+Math.Abs(bi1))*(u22abs*(Math.Abs(ur11r)+Math.Abs(ui11r))), Math.Abs(br2)+Math.Abs(bi2)); if( (double)(bbnd)>(double)(1) && (double)(u22abs)<(double)(1) ) { if( (double)(bbnd)>=(double)(bignum*u22abs) ) { scl = 1/bbnd; br1 = scl*br1; bi1 = scl*bi1; br2 = scl*br2; bi2 = scl*bi2; } } internalhsevdladiv(br2, bi2, ur22, ui22, ref xr2, ref xi2); xr1 = ur11r*br1-ui11r*bi1-ur12s*xr2+ui12s*xi2; xi1 = ui11r*br1+ur11r*bi1-ui12s*xr2-ur12s*xi2; if( zswap4[icmax] ) { x[1,1] = xr2; x[2,1] = xr1; x[1,2] = xi2; x[2,2] = xi1; } else { x[1,1] = xr1; x[2,1] = xr2; x[1,2] = xi1; x[2,2] = xi2; } xnorm = Math.Max(Math.Abs(xr1)+Math.Abs(xi1), Math.Abs(xr2)+Math.Abs(xi2)); // // Further scaling if norm(A) norm(X) > overflow // if( (double)(xnorm)>(double)(1) && (double)(cmax)>(double)(1) ) { if( (double)(xnorm)>(double)(bignum/cmax) ) { temp = cmax/bignum; x[1,1] = temp*x[1,1]; x[2,1] = temp*x[2,1]; x[1,2] = temp*x[1,2]; x[2,2] = temp*x[2,2]; xnorm = temp*xnorm; scl = temp*scl; } } } } } /************************************************************************* performs complex division in real arithmetic a + i*b p + i*q = --------- c + i*d The algorithm is due to Robert L. Smith and can be found in D. Knuth, The art of Computer Programming, Vol.2, p.195 -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 *************************************************************************/ private static void internalhsevdladiv(double a, double b, double c, double d, ref double p, ref double q) { double e = 0; double f = 0; p = 0; q = 0; if( (double)(Math.Abs(d))<(double)(Math.Abs(c)) ) { e = d/c; f = c+d*e; p = (a+b*e)/f; q = (b-a*e)/f; } else { e = c/d; f = d+c*e; p = (b+a*e)/f; q = (-a+b*e)/f; } } } public class matgen { /************************************************************************* Generation of a random uniformly distributed (Haar) orthogonal matrix INPUT PARAMETERS: N - matrix size, N>=1 OUTPUT PARAMETERS: A - orthogonal NxN matrix, array[0..N-1,0..N-1] NOTE: this function uses algorithm described in Stewart, G. W. (1980), "The Efficient Generation of Random Orthogonal Matrices with an Application to Condition Estimators". Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it: * takes an NxN one * takes uniformly distributed unit vector of dimension N+1. * constructs a Householder reflection from the vector, then applies it to the smaller matrix (embedded in the larger size with a 1 at the bottom right corner). -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixrndorthogonal(int n, ref double[,] a) { int i = 0; int j = 0; a = new double[0,0]; alglib.ap.assert(n>=1, "RMatrixRndOrthogonal: N<1!"); a = new double[n, n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { if( i==j ) { a[i,j] = 1; } else { a[i,j] = 0; } } } rmatrixrndorthogonalfromtheright(ref a, n, n); } /************************************************************************* Generation of random NxN matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixrndcond(int n, double c, ref double[,] a) { int i = 0; int j = 0; double l1 = 0; double l2 = 0; hqrnd.hqrndstate rs = new hqrnd.hqrndstate(); a = new double[0,0]; alglib.ap.assert(n>=1 && (double)(c)>=(double)(1), "RMatrixRndCond: N<1 or C<1!"); a = new double[n, n]; if( n==1 ) { // // special case // a[0,0] = 2*math.randominteger(2)-1; return; } hqrnd.hqrndrandomize(rs); l1 = 0; l2 = Math.Log(1/c); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 0; } } a[0,0] = Math.Exp(l1); for(i=1; i<=n-2; i++) { a[i,i] = Math.Exp(hqrnd.hqrnduniformr(rs)*(l2-l1)+l1); } a[n-1,n-1] = Math.Exp(l2); rmatrixrndorthogonalfromtheleft(ref a, n, n); rmatrixrndorthogonalfromtheright(ref a, n, n); } /************************************************************************* Generation of a random Haar distributed orthogonal complex matrix INPUT PARAMETERS: N - matrix size, N>=1 OUTPUT PARAMETERS: A - orthogonal NxN matrix, array[0..N-1,0..N-1] NOTE: this function uses algorithm described in Stewart, G. W. (1980), "The Efficient Generation of Random Orthogonal Matrices with an Application to Condition Estimators". Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it: * takes an NxN one * takes uniformly distributed unit vector of dimension N+1. * constructs a Householder reflection from the vector, then applies it to the smaller matrix (embedded in the larger size with a 1 at the bottom right corner). -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixrndorthogonal(int n, ref complex[,] a) { int i = 0; int j = 0; a = new complex[0,0]; alglib.ap.assert(n>=1, "CMatrixRndOrthogonal: N<1!"); a = new complex[n, n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { if( i==j ) { a[i,j] = 1; } else { a[i,j] = 0; } } } cmatrixrndorthogonalfromtheright(ref a, n, n); } /************************************************************************* Generation of random NxN complex matrix with given condition number C and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixrndcond(int n, double c, ref complex[,] a) { int i = 0; int j = 0; double l1 = 0; double l2 = 0; hqrnd.hqrndstate state = new hqrnd.hqrndstate(); complex v = 0; a = new complex[0,0]; alglib.ap.assert(n>=1 && (double)(c)>=(double)(1), "CMatrixRndCond: N<1 or C<1!"); a = new complex[n, n]; if( n==1 ) { // // special case // hqrnd.hqrndrandomize(state); hqrnd.hqrndunit2(state, ref v.x, ref v.y); a[0,0] = v; return; } hqrnd.hqrndrandomize(state); l1 = 0; l2 = Math.Log(1/c); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 0; } } a[0,0] = Math.Exp(l1); for(i=1; i<=n-2; i++) { a[i,i] = Math.Exp(hqrnd.hqrnduniformr(state)*(l2-l1)+l1); } a[n-1,n-1] = Math.Exp(l2); cmatrixrndorthogonalfromtheleft(ref a, n, n); cmatrixrndorthogonalfromtheright(ref a, n, n); } /************************************************************************* Generation of random NxN symmetric matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void smatrixrndcond(int n, double c, ref double[,] a) { int i = 0; int j = 0; double l1 = 0; double l2 = 0; hqrnd.hqrndstate rs = new hqrnd.hqrndstate(); a = new double[0,0]; alglib.ap.assert(n>=1 && (double)(c)>=(double)(1), "SMatrixRndCond: N<1 or C<1!"); a = new double[n, n]; if( n==1 ) { // // special case // a[0,0] = 2*math.randominteger(2)-1; return; } // // Prepare matrix // hqrnd.hqrndrandomize(rs); l1 = 0; l2 = Math.Log(1/c); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 0; } } a[0,0] = Math.Exp(l1); for(i=1; i<=n-2; i++) { a[i,i] = (2*hqrnd.hqrnduniformi(rs, 2)-1)*Math.Exp(hqrnd.hqrnduniformr(rs)*(l2-l1)+l1); } a[n-1,n-1] = Math.Exp(l2); // // Multiply // smatrixrndmultiply(ref a, n); } /************************************************************************* Generation of random NxN symmetric positive definite matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random SPD matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void spdmatrixrndcond(int n, double c, ref double[,] a) { int i = 0; int j = 0; double l1 = 0; double l2 = 0; hqrnd.hqrndstate rs = new hqrnd.hqrndstate(); a = new double[0,0]; // // Special cases // if( n<=0 || (double)(c)<(double)(1) ) { return; } a = new double[n, n]; if( n==1 ) { a[0,0] = 1; return; } // // Prepare matrix // hqrnd.hqrndrandomize(rs); l1 = 0; l2 = Math.Log(1/c); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 0; } } a[0,0] = Math.Exp(l1); for(i=1; i<=n-2; i++) { a[i,i] = Math.Exp(hqrnd.hqrnduniformr(rs)*(l2-l1)+l1); } a[n-1,n-1] = Math.Exp(l2); // // Multiply // smatrixrndmultiply(ref a, n); } /************************************************************************* Generation of random NxN Hermitian matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void hmatrixrndcond(int n, double c, ref complex[,] a) { int i = 0; int j = 0; double l1 = 0; double l2 = 0; hqrnd.hqrndstate rs = new hqrnd.hqrndstate(); a = new complex[0,0]; alglib.ap.assert(n>=1 && (double)(c)>=(double)(1), "HMatrixRndCond: N<1 or C<1!"); a = new complex[n, n]; if( n==1 ) { // // special case // a[0,0] = 2*math.randominteger(2)-1; return; } // // Prepare matrix // hqrnd.hqrndrandomize(rs); l1 = 0; l2 = Math.Log(1/c); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 0; } } a[0,0] = Math.Exp(l1); for(i=1; i<=n-2; i++) { a[i,i] = (2*hqrnd.hqrnduniformi(rs, 2)-1)*Math.Exp(hqrnd.hqrnduniformr(rs)*(l2-l1)+l1); } a[n-1,n-1] = Math.Exp(l2); // // Multiply // hmatrixrndmultiply(ref a, n); // // post-process to ensure that matrix diagonal is real // for(i=0; i<=n-1; i++) { a[i,i].y = 0; } } /************************************************************************* Generation of random NxN Hermitian positive definite matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random HPD matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void hpdmatrixrndcond(int n, double c, ref complex[,] a) { int i = 0; int j = 0; double l1 = 0; double l2 = 0; hqrnd.hqrndstate rs = new hqrnd.hqrndstate(); a = new complex[0,0]; // // Special cases // if( n<=0 || (double)(c)<(double)(1) ) { return; } a = new complex[n, n]; if( n==1 ) { a[0,0] = 1; return; } // // Prepare matrix // hqrnd.hqrndrandomize(rs); l1 = 0; l2 = Math.Log(1/c); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 0; } } a[0,0] = Math.Exp(l1); for(i=1; i<=n-2; i++) { a[i,i] = Math.Exp(hqrnd.hqrnduniformr(rs)*(l2-l1)+l1); } a[n-1,n-1] = Math.Exp(l2); // // Multiply // hmatrixrndmultiply(ref a, n); // // post-process to ensure that matrix diagonal is real // for(i=0; i<=n-1; i++) { a[i,i].y = 0; } } /************************************************************************* Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..M-1, 0..N-1] M, N- matrix size OUTPUT PARAMETERS: A - A*Q, where Q is random NxN orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixrndorthogonalfromtheright(ref double[,] a, int m, int n) { double tau = 0; double lambdav = 0; int s = 0; int i = 0; double u1 = 0; double u2 = 0; double[] w = new double[0]; double[] v = new double[0]; hqrnd.hqrndstate state = new hqrnd.hqrndstate(); int i_ = 0; alglib.ap.assert(n>=1 && m>=1, "RMatrixRndOrthogonalFromTheRight: N<1 or M<1!"); if( n==1 ) { // // Special case // tau = 2*math.randominteger(2)-1; for(i=0; i<=m-1; i++) { a[i,0] = a[i,0]*tau; } return; } // // General case. // First pass. // w = new double[m]; v = new double[n+1]; hqrnd.hqrndrandomize(state); for(s=2; s<=n; s++) { // // Prepare random normal v // do { i = 1; while( i<=s ) { hqrnd.hqrndnormal2(state, ref u1, ref u2); v[i] = u1; if( i+1<=s ) { v[i+1] = u2; } i = i+2; } lambdav = 0.0; for(i_=1; i_<=s;i_++) { lambdav += v[i_]*v[i_]; } } while( (double)(lambdav)==(double)(0) ); // // Prepare and apply reflection // reflections.generatereflection(ref v, s, ref tau); v[1] = 1; reflections.applyreflectionfromtheright(ref a, tau, v, 0, m-1, n-s, n-1, ref w); } // // Second pass. // for(i=0; i<=n-1; i++) { tau = 2*hqrnd.hqrnduniformi(state, 2)-1; for(i_=0; i_<=m-1;i_++) { a[i_,i] = tau*a[i_,i]; } } } /************************************************************************* Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..M-1, 0..N-1] M, N- matrix size OUTPUT PARAMETERS: A - Q*A, where Q is random MxM orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void rmatrixrndorthogonalfromtheleft(ref double[,] a, int m, int n) { double tau = 0; double lambdav = 0; int s = 0; int i = 0; int j = 0; double u1 = 0; double u2 = 0; double[] w = new double[0]; double[] v = new double[0]; hqrnd.hqrndstate state = new hqrnd.hqrndstate(); int i_ = 0; alglib.ap.assert(n>=1 && m>=1, "RMatrixRndOrthogonalFromTheRight: N<1 or M<1!"); if( m==1 ) { // // special case // tau = 2*math.randominteger(2)-1; for(j=0; j<=n-1; j++) { a[0,j] = a[0,j]*tau; } return; } // // General case. // First pass. // w = new double[n]; v = new double[m+1]; hqrnd.hqrndrandomize(state); for(s=2; s<=m; s++) { // // Prepare random normal v // do { i = 1; while( i<=s ) { hqrnd.hqrndnormal2(state, ref u1, ref u2); v[i] = u1; if( i+1<=s ) { v[i+1] = u2; } i = i+2; } lambdav = 0.0; for(i_=1; i_<=s;i_++) { lambdav += v[i_]*v[i_]; } } while( (double)(lambdav)==(double)(0) ); // // Prepare and apply reflection // reflections.generatereflection(ref v, s, ref tau); v[1] = 1; reflections.applyreflectionfromtheleft(ref a, tau, v, m-s, m-1, 0, n-1, ref w); } // // Second pass. // for(i=0; i<=m-1; i++) { tau = 2*hqrnd.hqrnduniformi(state, 2)-1; for(i_=0; i_<=n-1;i_++) { a[i,i_] = tau*a[i,i_]; } } } /************************************************************************* Multiplication of MxN complex matrix by NxN random Haar distributed complex orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..M-1, 0..N-1] M, N- matrix size OUTPUT PARAMETERS: A - A*Q, where Q is random NxN orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixrndorthogonalfromtheright(ref complex[,] a, int m, int n) { complex lambdav = 0; complex tau = 0; int s = 0; int i = 0; complex[] w = new complex[0]; complex[] v = new complex[0]; hqrnd.hqrndstate state = new hqrnd.hqrndstate(); int i_ = 0; alglib.ap.assert(n>=1 && m>=1, "CMatrixRndOrthogonalFromTheRight: N<1 or M<1!"); if( n==1 ) { // // Special case // hqrnd.hqrndrandomize(state); hqrnd.hqrndunit2(state, ref tau.x, ref tau.y); for(i=0; i<=m-1; i++) { a[i,0] = a[i,0]*tau; } return; } // // General case. // First pass. // w = new complex[m]; v = new complex[n+1]; hqrnd.hqrndrandomize(state); for(s=2; s<=n; s++) { // // Prepare random normal v // do { for(i=1; i<=s; i++) { hqrnd.hqrndnormal2(state, ref tau.x, ref tau.y); v[i] = tau; } lambdav = 0.0; for(i_=1; i_<=s;i_++) { lambdav += v[i_]*math.conj(v[i_]); } } while( lambdav==0 ); // // Prepare and apply reflection // creflections.complexgeneratereflection(ref v, s, ref tau); v[1] = 1; creflections.complexapplyreflectionfromtheright(ref a, tau, ref v, 0, m-1, n-s, n-1, ref w); } // // Second pass. // for(i=0; i<=n-1; i++) { hqrnd.hqrndunit2(state, ref tau.x, ref tau.y); for(i_=0; i_<=m-1;i_++) { a[i_,i] = tau*a[i_,i]; } } } /************************************************************************* Multiplication of MxN complex matrix by MxM random Haar distributed complex orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..M-1, 0..N-1] M, N- matrix size OUTPUT PARAMETERS: A - Q*A, where Q is random MxM orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void cmatrixrndorthogonalfromtheleft(ref complex[,] a, int m, int n) { complex tau = 0; complex lambdav = 0; int s = 0; int i = 0; int j = 0; complex[] w = new complex[0]; complex[] v = new complex[0]; hqrnd.hqrndstate state = new hqrnd.hqrndstate(); int i_ = 0; alglib.ap.assert(n>=1 && m>=1, "CMatrixRndOrthogonalFromTheRight: N<1 or M<1!"); if( m==1 ) { // // special case // hqrnd.hqrndrandomize(state); hqrnd.hqrndunit2(state, ref tau.x, ref tau.y); for(j=0; j<=n-1; j++) { a[0,j] = a[0,j]*tau; } return; } // // General case. // First pass. // w = new complex[n]; v = new complex[m+1]; hqrnd.hqrndrandomize(state); for(s=2; s<=m; s++) { // // Prepare random normal v // do { for(i=1; i<=s; i++) { hqrnd.hqrndnormal2(state, ref tau.x, ref tau.y); v[i] = tau; } lambdav = 0.0; for(i_=1; i_<=s;i_++) { lambdav += v[i_]*math.conj(v[i_]); } } while( lambdav==0 ); // // Prepare and apply reflection // creflections.complexgeneratereflection(ref v, s, ref tau); v[1] = 1; creflections.complexapplyreflectionfromtheleft(ref a, tau, v, m-s, m-1, 0, n-1, ref w); } // // Second pass. // for(i=0; i<=m-1; i++) { hqrnd.hqrndunit2(state, ref tau.x, ref tau.y); for(i_=0; i_<=n-1;i_++) { a[i,i_] = tau*a[i,i_]; } } } /************************************************************************* Symmetric multiplication of NxN matrix by random Haar distributed orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..N-1, 0..N-1] N - matrix size OUTPUT PARAMETERS: A - Q'*A*Q, where Q is random NxN orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void smatrixrndmultiply(ref double[,] a, int n) { double tau = 0; double lambdav = 0; int s = 0; int i = 0; double u1 = 0; double u2 = 0; double[] w = new double[0]; double[] v = new double[0]; hqrnd.hqrndstate state = new hqrnd.hqrndstate(); int i_ = 0; // // General case. // w = new double[n]; v = new double[n+1]; hqrnd.hqrndrandomize(state); for(s=2; s<=n; s++) { // // Prepare random normal v // do { i = 1; while( i<=s ) { hqrnd.hqrndnormal2(state, ref u1, ref u2); v[i] = u1; if( i+1<=s ) { v[i+1] = u2; } i = i+2; } lambdav = 0.0; for(i_=1; i_<=s;i_++) { lambdav += v[i_]*v[i_]; } } while( (double)(lambdav)==(double)(0) ); // // Prepare and apply reflection // reflections.generatereflection(ref v, s, ref tau); v[1] = 1; reflections.applyreflectionfromtheright(ref a, tau, v, 0, n-1, n-s, n-1, ref w); reflections.applyreflectionfromtheleft(ref a, tau, v, n-s, n-1, 0, n-1, ref w); } // // Second pass. // for(i=0; i<=n-1; i++) { tau = 2*hqrnd.hqrnduniformi(state, 2)-1; for(i_=0; i_<=n-1;i_++) { a[i_,i] = tau*a[i_,i]; } for(i_=0; i_<=n-1;i_++) { a[i,i_] = tau*a[i,i_]; } } // // Copy upper triangle to lower // for(i=0; i<=n-2; i++) { for(i_=i+1; i_<=n-1;i_++) { a[i_,i] = a[i,i_]; } } } /************************************************************************* Hermitian multiplication of NxN matrix by random Haar distributed complex orthogonal matrix INPUT PARAMETERS: A - matrix, array[0..N-1, 0..N-1] N - matrix size OUTPUT PARAMETERS: A - Q^H*A*Q, where Q is random NxN orthogonal matrix -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ public static void hmatrixrndmultiply(ref complex[,] a, int n) { complex tau = 0; complex lambdav = 0; int s = 0; int i = 0; complex[] w = new complex[0]; complex[] v = new complex[0]; hqrnd.hqrndstate state = new hqrnd.hqrndstate(); int i_ = 0; // // General case. // w = new complex[n]; v = new complex[n+1]; hqrnd.hqrndrandomize(state); for(s=2; s<=n; s++) { // // Prepare random normal v // do { for(i=1; i<=s; i++) { hqrnd.hqrndnormal2(state, ref tau.x, ref tau.y); v[i] = tau; } lambdav = 0.0; for(i_=1; i_<=s;i_++) { lambdav += v[i_]*math.conj(v[i_]); } } while( lambdav==0 ); // // Prepare and apply reflection // creflections.complexgeneratereflection(ref v, s, ref tau); v[1] = 1; creflections.complexapplyreflectionfromtheright(ref a, tau, ref v, 0, n-1, n-s, n-1, ref w); creflections.complexapplyreflectionfromtheleft(ref a, math.conj(tau), v, n-s, n-1, 0, n-1, ref w); } // // Second pass. // for(i=0; i<=n-1; i++) { hqrnd.hqrndunit2(state, ref tau.x, ref tau.y); for(i_=0; i_<=n-1;i_++) { a[i_,i] = tau*a[i_,i]; } tau = math.conj(tau); for(i_=0; i_<=n-1;i_++) { a[i,i_] = tau*a[i,i_]; } } // // Change all values from lower triangle by complex-conjugate values // from upper one // for(i=0; i<=n-2; i++) { for(i_=i+1; i_<=n-1;i_++) { a[i_,i] = a[i,i_]; } } for(s=0; s<=n-2; s++) { for(i=s+1; i<=n-1; i++) { a[i,s].y = -a[i,s].y; } } } } public class sparse { /************************************************************************* Sparse matrix structure. You should use ALGLIB functions to work with sparse matrix. Never try to access its fields directly! NOTES ON THE SPARSE STORAGE FORMATS Sparse matrices can be stored using several formats: * Hash-Table representation * Compressed Row Storage (CRS) * Skyline matrix storage (SKS) Each of the formats has benefits and drawbacks: * Hash-table is good for dynamic operations (insertion of new elements), but does not support linear algebra operations * CRS is good for operations like matrix-vector or matrix-matrix products, but its initialization is less convenient - you have to tell row sizes at the initialization, and you have to fill matrix only row by row, from left to right. * SKS is a special format which is used to store triangular factors from Cholesky factorization. It does not support dynamic modification, and support for linear algebra operations is very limited. Tables below outline information about these two formats: OPERATIONS WITH MATRIX HASH CRS SKS creation + + + SparseGet + + + SparseRewriteExisting + + + SparseSet + SparseAdd + SparseGetRow + + SparseGetCompressedRow + + sparse-dense linear algebra + + *************************************************************************/ public class sparsematrix : apobject { public double[] vals; public int[] idx; public int[] ridx; public int[] didx; public int[] uidx; public int matrixtype; public int m; public int n; public int nfree; public int ninitialized; public int tablesize; public sparsematrix() { init(); } public override void init() { vals = new double[0]; idx = new int[0]; ridx = new int[0]; didx = new int[0]; uidx = new int[0]; } public override alglib.apobject make_copy() { sparsematrix _result = new sparsematrix(); _result.vals = (double[])vals.Clone(); _result.idx = (int[])idx.Clone(); _result.ridx = (int[])ridx.Clone(); _result.didx = (int[])didx.Clone(); _result.uidx = (int[])uidx.Clone(); _result.matrixtype = matrixtype; _result.m = m; _result.n = n; _result.nfree = nfree; _result.ninitialized = ninitialized; _result.tablesize = tablesize; return _result; } }; /************************************************************************* Temporary buffers for sparse matrix operations. You should pass an instance of this structure to factorization functions. It allows to reuse memory during repeated sparse factorizations. You do not have to call some initialization function - simply passing an instance to factorization function is enough. *************************************************************************/ public class sparsebuffers : apobject { public int[] d; public int[] u; public sparsematrix s; public sparsebuffers() { init(); } public override void init() { d = new int[0]; u = new int[0]; s = new sparsematrix(); } public override alglib.apobject make_copy() { sparsebuffers _result = new sparsebuffers(); _result.d = (int[])d.Clone(); _result.u = (int[])u.Clone(); _result.s = (sparsematrix)s.make_copy(); return _result; } }; public const double desiredloadfactor = 0.66; public const double maxloadfactor = 0.75; public const double growfactor = 2.00; public const int additional = 10; public const int linalgswitch = 16; /************************************************************************* This function creates sparse matrix in a Hash-Table format. This function creates Hast-Table matrix, which can be converted to CRS format after its initialization is over. Typical usage scenario for a sparse matrix is: 1. creation in a Hash-Table format 2. insertion of the matrix elements 3. conversion to the CRS representation 4. matrix is passed to some linear algebra algorithm Some information about different matrix formats can be found below, in the "NOTES" section. INPUT PARAMETERS M - number of rows in a matrix, M>=1 N - number of columns in a matrix, N>=1 K - K>=0, expected number of non-zero elements in a matrix. K can be inexact approximation, can be less than actual number of elements (table will grow when needed) or even zero). It is important to understand that although hash-table may grow automatically, it is better to provide good estimate of data size. OUTPUT PARAMETERS S - sparse M*N matrix in Hash-Table representation. All elements of the matrix are zero. NOTE 1 Hash-tables use memory inefficiently, and they have to keep some amount of the "spare memory" in order to have good performance. Hash table for matrix with K non-zero elements will need C*K*(8+2*sizeof(int)) bytes, where C is a small constant, about 1.5-2 in magnitude. CRS storage, from the other side, is more memory-efficient, and needs just K*(8+sizeof(int))+M*sizeof(int) bytes, where M is a number of rows in a matrix. When you convert from the Hash-Table to CRS representation, all unneeded memory will be freed. NOTE 2 Comments of SparseMatrix structure outline information about different sparse storage formats. We recommend you to read them before starting to use ALGLIB sparse matrices. NOTE 3 This function completely overwrites S with new sparse matrix. Previously allocated storage is NOT reused. If you want to reuse already allocated memory, call SparseCreateBuf function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsecreate(int m, int n, int k, sparsematrix s) { sparsecreatebuf(m, n, k, s); } /************************************************************************* This version of SparseCreate function creates sparse matrix in Hash-Table format, reusing previously allocated storage as much as possible. Read comments for SparseCreate() for more information. INPUT PARAMETERS M - number of rows in a matrix, M>=1 N - number of columns in a matrix, N>=1 K - K>=0, expected number of non-zero elements in a matrix. K can be inexact approximation, can be less than actual number of elements (table will grow when needed) or even zero). It is important to understand that although hash-table may grow automatically, it is better to provide good estimate of data size. S - SparseMatrix structure which MAY contain some already allocated storage. OUTPUT PARAMETERS S - sparse M*N matrix in Hash-Table representation. All elements of the matrix are zero. Previously allocated storage is reused, if its size is compatible with expected number of non-zeros K. -- ALGLIB PROJECT -- Copyright 14.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsecreatebuf(int m, int n, int k, sparsematrix s) { int i = 0; alglib.ap.assert(m>0, "SparseCreateBuf: M<=0"); alglib.ap.assert(n>0, "SparseCreateBuf: N<=0"); alglib.ap.assert(k>=0, "SparseCreateBuf: K<0"); // // Hash-table size is max(existing_size,requested_size) // // NOTE: it is important to use ALL available memory for hash table // because it is impossible to efficiently reallocate table // without temporary storage. So, if we want table with up to // 1.000.000 elements, we have to create such table from the // very beginning. Otherwise, the very idea of memory reuse // will be compromised. // s.tablesize = (int)Math.Round(k/desiredloadfactor+additional); apserv.rvectorsetlengthatleast(ref s.vals, s.tablesize); s.tablesize = alglib.ap.len(s.vals); // // Initialize other fields // s.matrixtype = 0; s.m = m; s.n = n; s.nfree = s.tablesize; apserv.ivectorsetlengthatleast(ref s.idx, 2*s.tablesize); for(i=0; i<=s.tablesize-1; i++) { s.idx[2*i] = -1; } } /************************************************************************* This function creates sparse matrix in a CRS format (expert function for situations when you are running out of memory). This function creates CRS matrix. Typical usage scenario for a CRS matrix is: 1. creation (you have to tell number of non-zero elements at each row at this moment) 2. insertion of the matrix elements (row by row, from left to right) 3. matrix is passed to some linear algebra algorithm This function is a memory-efficient alternative to SparseCreate(), but it is more complex because it requires you to know in advance how large your matrix is. Some information about different matrix formats can be found in comments on SparseMatrix structure. We recommend you to read them before starting to use ALGLIB sparse matrices.. INPUT PARAMETERS M - number of rows in a matrix, M>=1 N - number of columns in a matrix, N>=1 NER - number of elements at each row, array[M], NER[I]>=0 OUTPUT PARAMETERS S - sparse M*N matrix in CRS representation. You have to fill ALL non-zero elements by calling SparseSet() BEFORE you try to use this matrix. NOTE: this function completely overwrites S with new sparse matrix. Previously allocated storage is NOT reused. If you want to reuse already allocated memory, call SparseCreateCRSBuf function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsecreatecrs(int m, int n, int[] ner, sparsematrix s) { int i = 0; alglib.ap.assert(m>0, "SparseCreateCRS: M<=0"); alglib.ap.assert(n>0, "SparseCreateCRS: N<=0"); alglib.ap.assert(alglib.ap.len(ner)>=m, "SparseCreateCRS: Length(NER)=0, "SparseCreateCRS: NER[] contains negative elements"); } sparsecreatecrsbuf(m, n, ner, s); } /************************************************************************* This function creates sparse matrix in a CRS format (expert function for situations when you are running out of memory). This version of CRS matrix creation function may reuse memory already allocated in S. This function creates CRS matrix. Typical usage scenario for a CRS matrix is: 1. creation (you have to tell number of non-zero elements at each row at this moment) 2. insertion of the matrix elements (row by row, from left to right) 3. matrix is passed to some linear algebra algorithm This function is a memory-efficient alternative to SparseCreate(), but it is more complex because it requires you to know in advance how large your matrix is. Some information about different matrix formats can be found in comments on SparseMatrix structure. We recommend you to read them before starting to use ALGLIB sparse matrices.. INPUT PARAMETERS M - number of rows in a matrix, M>=1 N - number of columns in a matrix, N>=1 NER - number of elements at each row, array[M], NER[I]>=0 S - sparse matrix structure with possibly preallocated memory. OUTPUT PARAMETERS S - sparse M*N matrix in CRS representation. You have to fill ALL non-zero elements by calling SparseSet() BEFORE you try to use this matrix. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsecreatecrsbuf(int m, int n, int[] ner, sparsematrix s) { int i = 0; int noe = 0; alglib.ap.assert(m>0, "SparseCreateCRSBuf: M<=0"); alglib.ap.assert(n>0, "SparseCreateCRSBuf: N<=0"); alglib.ap.assert(alglib.ap.len(ner)>=m, "SparseCreateCRSBuf: Length(NER)=0, "SparseCreateCRSBuf: NER[] contains negative elements"); noe = noe+ner[i]; s.ridx[i+1] = s.ridx[i]+ner[i]; } apserv.rvectorsetlengthatleast(ref s.vals, noe); apserv.ivectorsetlengthatleast(ref s.idx, noe); if( noe==0 ) { sparseinitduidx(s); } } /************************************************************************* This function creates sparse matrix in a SKS format (skyline storage format). In most cases you do not need this function - CRS format better suits most use cases. INPUT PARAMETERS M, N - number of rows(M) and columns (N) in a matrix: * M=N (as for now, ALGLIB supports only square SKS) * N>=1 * M>=1 D - "bottom" bandwidths, array[M], D[I]>=0. I-th element stores number of non-zeros at I-th row, below the diagonal (diagonal itself is not included) U - "top" bandwidths, array[N], U[I]>=0. I-th element stores number of non-zeros at I-th row, above the diagonal (diagonal itself is not included) OUTPUT PARAMETERS S - sparse M*N matrix in SKS representation. All elements are filled by zeros. You may use SparseRewriteExisting() to change their values. NOTE: this function completely overwrites S with new sparse matrix. Previously allocated storage is NOT reused. If you want to reuse already allocated memory, call SparseCreateSKSBuf function. -- ALGLIB PROJECT -- Copyright 13.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsecreatesks(int m, int n, int[] d, int[] u, sparsematrix s) { int i = 0; alglib.ap.assert(m>0, "SparseCreateSKS: M<=0"); alglib.ap.assert(n>0, "SparseCreateSKS: N<=0"); alglib.ap.assert(m==n, "SparseCreateSKS: M<>N"); alglib.ap.assert(alglib.ap.len(d)>=m, "SparseCreateSKS: Length(D)=n, "SparseCreateSKS: Length(U)=0, "SparseCreateSKS: D[] contains negative elements"); alglib.ap.assert(d[i]<=i, "SparseCreateSKS: D[I]>I for some I"); } for(i=0; i<=n-1; i++) { alglib.ap.assert(u[i]>=0, "SparseCreateSKS: U[] contains negative elements"); alglib.ap.assert(u[i]<=i, "SparseCreateSKS: U[I]>I for some I"); } sparsecreatesksbuf(m, n, d, u, s); } /************************************************************************* This is "buffered" version of SparseCreateSKS() which reuses memory previously allocated in S (of course, memory is reallocated if needed). This function creates sparse matrix in a SKS format (skyline storage format). In most cases you do not need this function - CRS format better suits most use cases. INPUT PARAMETERS M, N - number of rows(M) and columns (N) in a matrix: * M=N (as for now, ALGLIB supports only square SKS) * N>=1 * M>=1 D - "bottom" bandwidths, array[M], 0<=D[I]<=I. I-th element stores number of non-zeros at I-th row, below the diagonal (diagonal itself is not included) U - "top" bandwidths, array[N], 0<=U[I]<=I. I-th element stores number of non-zeros at I-th row, above the diagonal (diagonal itself is not included) OUTPUT PARAMETERS S - sparse M*N matrix in SKS representation. All elements are filled by zeros. You may use SparseSet()/SparseAdd() to change their values. -- ALGLIB PROJECT -- Copyright 13.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsecreatesksbuf(int m, int n, int[] d, int[] u, sparsematrix s) { int i = 0; int minmn = 0; int nz = 0; int mxd = 0; int mxu = 0; alglib.ap.assert(m>0, "SparseCreateSKSBuf: M<=0"); alglib.ap.assert(n>0, "SparseCreateSKSBuf: N<=0"); alglib.ap.assert(m==n, "SparseCreateSKSBuf: M<>N"); alglib.ap.assert(alglib.ap.len(d)>=m, "SparseCreateSKSBuf: Length(D)=n, "SparseCreateSKSBuf: Length(U)=0, "SparseCreateSKSBuf: D[] contains negative elements"); alglib.ap.assert(d[i]<=i, "SparseCreateSKSBuf: D[I]>I for some I"); } for(i=0; i<=n-1; i++) { alglib.ap.assert(u[i]>=0, "SparseCreateSKSBuf: U[] contains negative elements"); alglib.ap.assert(u[i]<=i, "SparseCreateSKSBuf: U[I]>I for some I"); } minmn = Math.Min(m, n); s.matrixtype = 2; s.ninitialized = 0; s.m = m; s.n = n; apserv.ivectorsetlengthatleast(ref s.ridx, minmn+1); s.ridx[0] = 0; nz = 0; for(i=0; i<=minmn-1; i++) { nz = nz+1+d[i]+u[i]; s.ridx[i+1] = s.ridx[i]+1+d[i]+u[i]; } apserv.rvectorsetlengthatleast(ref s.vals, nz); for(i=0; i<=nz-1; i++) { s.vals[i] = 0.0; } apserv.ivectorsetlengthatleast(ref s.didx, m+1); mxd = 0; for(i=0; i<=m-1; i++) { s.didx[i] = d[i]; mxd = Math.Max(mxd, d[i]); } s.didx[m] = mxd; apserv.ivectorsetlengthatleast(ref s.uidx, n+1); mxu = 0; for(i=0; i<=n-1; i++) { s.uidx[i] = u[i]; mxu = Math.Max(mxu, u[i]); } s.uidx[n] = mxu; } /************************************************************************* This function copies S0 to S1. This function completely deallocates memory owned by S1 before creating a copy of S0. If you want to reuse memory, use SparseCopyBuf. NOTE: this function does not verify its arguments, it just copies all fields of the structure. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsecopy(sparsematrix s0, sparsematrix s1) { sparsecopybuf(s0, s1); } /************************************************************************* This function copies S0 to S1. Memory already allocated in S1 is reused as much as possible. NOTE: this function does not verify its arguments, it just copies all fields of the structure. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsecopybuf(sparsematrix s0, sparsematrix s1) { int l = 0; int i = 0; s1.matrixtype = s0.matrixtype; s1.m = s0.m; s1.n = s0.n; s1.nfree = s0.nfree; s1.ninitialized = s0.ninitialized; s1.tablesize = s0.tablesize; // // Initialization for arrays // l = alglib.ap.len(s0.vals); apserv.rvectorsetlengthatleast(ref s1.vals, l); for(i=0; i<=l-1; i++) { s1.vals[i] = s0.vals[i]; } l = alglib.ap.len(s0.ridx); apserv.ivectorsetlengthatleast(ref s1.ridx, l); for(i=0; i<=l-1; i++) { s1.ridx[i] = s0.ridx[i]; } l = alglib.ap.len(s0.idx); apserv.ivectorsetlengthatleast(ref s1.idx, l); for(i=0; i<=l-1; i++) { s1.idx[i] = s0.idx[i]; } // // Initalization for CRS-parameters // l = alglib.ap.len(s0.uidx); apserv.ivectorsetlengthatleast(ref s1.uidx, l); for(i=0; i<=l-1; i++) { s1.uidx[i] = s0.uidx[i]; } l = alglib.ap.len(s0.didx); apserv.ivectorsetlengthatleast(ref s1.didx, l); for(i=0; i<=l-1; i++) { s1.didx[i] = s0.didx[i]; } } /************************************************************************* This function efficiently swaps contents of S0 and S1. -- ALGLIB PROJECT -- Copyright 16.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparseswap(sparsematrix s0, sparsematrix s1) { apserv.swapi(ref s1.matrixtype, ref s0.matrixtype); apserv.swapi(ref s1.m, ref s0.m); apserv.swapi(ref s1.n, ref s0.n); apserv.swapi(ref s1.nfree, ref s0.nfree); apserv.swapi(ref s1.ninitialized, ref s0.ninitialized); apserv.swapi(ref s1.tablesize, ref s0.tablesize); alglib.ap.swap(ref s1.vals, ref s0.vals); alglib.ap.swap(ref s1.ridx, ref s0.ridx); alglib.ap.swap(ref s1.idx, ref s0.idx); alglib.ap.swap(ref s1.uidx, ref s0.uidx); alglib.ap.swap(ref s1.didx, ref s0.didx); } /************************************************************************* This function adds value to S[i,j] - element of the sparse matrix. Matrix must be in a Hash-Table mode. In case S[i,j] already exists in the table, V i added to its value. In case S[i,j] is non-existent, it is inserted in the table. Table automatically grows when necessary. INPUT PARAMETERS S - sparse M*N matrix in Hash-Table representation. Exception will be thrown for CRS matrix. I - row index of the element to modify, 0<=I=0, "SparseAdd: I<0"); alglib.ap.assert(i=M"); alglib.ap.assert(j>=0, "SparseAdd: J<0"); alglib.ap.assert(j=N"); alglib.ap.assert(math.isfinite(v), "SparseAdd: V is not finite number"); if( (double)(v)==(double)(0) ) { return; } tcode = -1; k = s.tablesize; if( (double)((1-maxloadfactor)*k)>=(double)(s.nfree) ) { sparseresizematrix(s); k = s.tablesize; } hashcode = hash(i, j, k); while( true ) { if( s.idx[2*hashcode]==-1 ) { if( tcode!=-1 ) { hashcode = tcode; } s.vals[hashcode] = v; s.idx[2*hashcode] = i; s.idx[2*hashcode+1] = j; if( tcode==-1 ) { s.nfree = s.nfree-1; } return; } else { if( s.idx[2*hashcode]==i && s.idx[2*hashcode+1]==j ) { s.vals[hashcode] = s.vals[hashcode]+v; if( (double)(s.vals[hashcode])==(double)(0) ) { s.idx[2*hashcode] = -2; } return; } // // Is it deleted element? // if( tcode==-1 && s.idx[2*hashcode]==-2 ) { tcode = hashcode; } // // Next step // hashcode = (hashcode+1)%k; } } } /************************************************************************* This function modifies S[i,j] - element of the sparse matrix. For Hash-based storage format: * this function can be called at any moment - during matrix initialization or later * new value can be zero or non-zero. In case new value of S[i,j] is zero, this element is deleted from the table. * this function has no effect when called with zero V for non-existent element. For CRS-bases storage format: * this function can be called ONLY DURING MATRIX INITIALIZATION * new value MUST be non-zero. Exception will be thrown for zero V. * elements must be initialized in correct order - from top row to bottom, within row - from left to right. For SKS storage: NOT SUPPORTED! Use SparseRewriteExisting() to work with SKS matrices. INPUT PARAMETERS S - sparse M*N matrix in Hash-Table or CRS representation. I - row index of the element to modify, 0<=I=0, "SparseSet: I<0"); alglib.ap.assert(i=M"); alglib.ap.assert(j>=0, "SparseSet: J<0"); alglib.ap.assert(j=N"); alglib.ap.assert(math.isfinite(v), "SparseSet: V is not finite number"); // // Hash-table matrix // if( s.matrixtype==0 ) { tcode = -1; k = s.tablesize; if( (double)((1-maxloadfactor)*k)>=(double)(s.nfree) ) { sparseresizematrix(s); k = s.tablesize; } hashcode = hash(i, j, k); while( true ) { if( s.idx[2*hashcode]==-1 ) { if( (double)(v)!=(double)(0) ) { if( tcode!=-1 ) { hashcode = tcode; } s.vals[hashcode] = v; s.idx[2*hashcode] = i; s.idx[2*hashcode+1] = j; if( tcode==-1 ) { s.nfree = s.nfree-1; } } return; } else { if( s.idx[2*hashcode]==i && s.idx[2*hashcode+1]==j ) { if( (double)(v)==(double)(0) ) { s.idx[2*hashcode] = -2; } else { s.vals[hashcode] = v; } return; } if( tcode==-1 && s.idx[2*hashcode]==-2 ) { tcode = hashcode; } // // Next step // hashcode = (hashcode+1)%k; } } } // // CRS matrix // if( s.matrixtype==1 ) { alglib.ap.assert((double)(v)!=(double)(0), "SparseSet: CRS format does not allow you to write zero elements"); alglib.ap.assert(s.ridx[i]<=s.ninitialized, "SparseSet: too few initialized elements at some row (you have promised more when called SparceCreateCRS)"); alglib.ap.assert(s.ridx[i+1]>s.ninitialized, "SparseSet: too many initialized elements at some row (you have promised less when called SparceCreateCRS)"); alglib.ap.assert(s.ninitialized==s.ridx[i] || s.idx[s.ninitialized-1]=0, "SparseGet: I<0"); alglib.ap.assert(i=M"); alglib.ap.assert(j>=0, "SparseGet: J<0"); alglib.ap.assert(j=N"); result = 0.0; if( s.matrixtype==0 ) { // // Hash-based storage // result = 0; k = s.tablesize; hashcode = hash(i, j, k); while( true ) { if( s.idx[2*hashcode]==-1 ) { return result; } if( s.idx[2*hashcode]==i && s.idx[2*hashcode+1]==j ) { result = s.vals[hashcode]; return result; } hashcode = (hashcode+1)%k; } } if( s.matrixtype==1 ) { // // CRS // alglib.ap.assert(s.ninitialized==s.ridx[s.m], "SparseGet: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)"); k0 = s.ridx[i]; k1 = s.ridx[i+1]-1; result = 0; while( k0<=k1 ) { k = (k0+k1)/2; if( s.idx[k]==j ) { result = s.vals[k]; return result; } if( s.idx[k]=0, "SparseGetDiagonal: I<0"); alglib.ap.assert(i=M"); alglib.ap.assert(i=N"); result = 0; if( s.matrixtype==0 ) { result = sparseget(s, i, i); return result; } if( s.matrixtype==1 ) { if( s.didx[i]!=s.uidx[i] ) { result = s.vals[s.didx[i]]; } return result; } if( s.matrixtype==2 ) { alglib.ap.assert(s.m==s.n, "SparseGetDiagonal: non-square SKS matrix not supported"); result = s.vals[s.ridx[i]+s.didx[i]]; return result; } alglib.ap.assert(false, "SparseGetDiagonal: unexpected matrix type"); return result; } /************************************************************************* This function calculates matrix-vector product S*x. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse M*N matrix in CRS or SKS format. X - array[N], input vector. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. Y - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS Y - array[M], S*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsemv(sparsematrix s, double[] x, ref double[] y) { double tval = 0; double v = 0; double vv = 0; int i = 0; int j = 0; int lt = 0; int rt = 0; int lt1 = 0; int rt1 = 0; int n = 0; int m = 0; int d = 0; int u = 0; int ri = 0; int ri1 = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(alglib.ap.len(x)>=s.n, "SparseMV: length(X)0 ) { lt = ri; rt = ri+d-1; lt1 = i-d; rt1 = i-1; i1_ = (lt1)-(lt); vv = 0.0; for(i_=lt; i_<=rt;i_++) { vv += s.vals[i_]*x[i_+i1_]; } v = v+vv; } y[i] = v; if( u>0 ) { lt = ri1-u; rt = ri1-1; lt1 = i-u; rt1 = i-1; v = x[i]; i1_ = (lt) - (lt1); for(i_=lt1; i_<=rt1;i_++) { y[i_] = y[i_] + v*s.vals[i_+i1_]; } } } return; } } /************************************************************************* This function calculates matrix-vector product S^T*x. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse M*N matrix in CRS or SKS format. X - array[M], input vector. For performance reasons we make only quick checks - we check that array size is at least M, but we do not check for NAN's or INF's. Y - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS Y - array[N], S^T*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsemtv(sparsematrix s, double[] x, ref double[] y) { int i = 0; int j = 0; int lt = 0; int rt = 0; int ct = 0; int lt1 = 0; int rt1 = 0; double v = 0; double vv = 0; int n = 0; int m = 0; int ri = 0; int ri1 = 0; int d = 0; int u = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(s.matrixtype==1 || s.matrixtype==2, "SparseMTV: incorrect matrix type (convert your matrix to CRS/SKS)"); alglib.ap.assert(alglib.ap.len(x)>=s.m, "SparseMTV: Length(X)0 ) { lt = ri; rt = ri+d-1; lt1 = i-d; rt1 = i-1; v = x[i]; i1_ = (lt) - (lt1); for(i_=lt1; i_<=rt1;i_++) { y[i_] = y[i_] + v*s.vals[i_+i1_]; } } v = s.vals[ri+d]*x[i]; if( u>0 ) { lt = ri1-u; rt = ri1-1; lt1 = i-u; rt1 = i-1; i1_ = (lt1)-(lt); vv = 0.0; for(i_=lt; i_<=rt;i_++) { vv += s.vals[i_]*x[i_+i1_]; } v = v+vv; } y[i] = v; } return; } } /************************************************************************* This function simultaneously calculates two matrix-vector products: S*x and S^T*x. S must be square (non-rectangular) matrix stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse N*N matrix in CRS or SKS format. X - array[N], input vector. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. Y0 - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. Y1 - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS Y0 - array[N], S*x Y1 - array[N], S^T*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsemv2(sparsematrix s, double[] x, ref double[] y0, ref double[] y1) { int l = 0; double tval = 0; int i = 0; int j = 0; double vx = 0; double vs = 0; double v = 0; double vv = 0; double vd0 = 0; double vd1 = 0; int vi = 0; int j0 = 0; int j1 = 0; int n = 0; int ri = 0; int ri1 = 0; int d = 0; int u = 0; int lt = 0; int rt = 0; int lt1 = 0; int rt1 = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(s.matrixtype==1 || s.matrixtype==2, "SparseMV2: incorrect matrix type (convert your matrix to CRS/SKS)"); alglib.ap.assert(s.m==s.n, "SparseMV2: matrix is non-square"); l = alglib.ap.len(x); alglib.ap.assert(l>=s.n, "SparseMV2: Length(X)0 ) { lt = ri; rt = ri+d-1; lt1 = i-d; rt1 = i-1; v = x[i]; i1_ = (lt) - (lt1); for(i_=lt1; i_<=rt1;i_++) { y1[i_] = y1[i_] + v*s.vals[i_+i1_]; } i1_ = (lt1)-(lt); vv = 0.0; for(i_=lt; i_<=rt;i_++) { vv += s.vals[i_]*x[i_+i1_]; } vd0 = vd0+vv; } if( u>0 ) { lt = ri1-u; rt = ri1-1; lt1 = i-u; rt1 = i-1; v = x[i]; i1_ = (lt) - (lt1); for(i_=lt1; i_<=rt1;i_++) { y0[i_] = y0[i_] + v*s.vals[i_+i1_]; } i1_ = (lt1)-(lt); vv = 0.0; for(i_=lt; i_<=rt;i_++) { vv += s.vals[i_]*x[i_+i1_]; } vd1 = vd1+vv; } y0[i] = vd0; y1[i] = vd1; } return; } } /************************************************************************* This function calculates matrix-vector product S*x, when S is symmetric matrix. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse M*M matrix in CRS or SKS format. IsUpper - whether upper or lower triangle of S is given: * if upper triangle is given, only S[i,j] for j>=i are used, and lower triangle is ignored (it can be empty - these elements are not referenced at all). * if lower triangle is given, only S[i,j] for j<=i are used, and upper triangle is ignored. X - array[N], input vector. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. Y - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS Y - array[M], S*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsesmv(sparsematrix s, bool isupper, double[] x, ref double[] y) { int n = 0; int i = 0; int j = 0; int id = 0; int lt = 0; int rt = 0; double v = 0; double vv = 0; double vy = 0; double vx = 0; double vd = 0; int ri = 0; int ri1 = 0; int d = 0; int u = 0; int lt1 = 0; int rt1 = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(s.matrixtype==1 || s.matrixtype==2, "SparseSMV: incorrect matrix type (convert your matrix to CRS/SKS)"); alglib.ap.assert(alglib.ap.len(x)>=s.n, "SparseSMV: length(X)0 && !isupper ) { lt = ri; rt = ri+d-1; lt1 = i-d; rt1 = i-1; v = x[i]; i1_ = (lt) - (lt1); for(i_=lt1; i_<=rt1;i_++) { y[i_] = y[i_] + v*s.vals[i_+i1_]; } i1_ = (lt1)-(lt); vv = 0.0; for(i_=lt; i_<=rt;i_++) { vv += s.vals[i_]*x[i_+i1_]; } vd = vd+vv; } if( u>0 && isupper ) { lt = ri1-u; rt = ri1-1; lt1 = i-u; rt1 = i-1; v = x[i]; i1_ = (lt) - (lt1); for(i_=lt1; i_<=rt1;i_++) { y[i_] = y[i_] + v*s.vals[i_+i1_]; } i1_ = (lt1)-(lt); vv = 0.0; for(i_=lt; i_<=rt;i_++) { vv += s.vals[i_]*x[i_+i1_]; } vd = vd+vv; } y[i] = vd; } return; } } /************************************************************************* This function calculates vector-matrix-vector product x'*S*x, where S is symmetric matrix. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse M*M matrix in CRS or SKS format. IsUpper - whether upper or lower triangle of S is given: * if upper triangle is given, only S[i,j] for j>=i are used, and lower triangle is ignored (it can be empty - these elements are not referenced at all). * if lower triangle is given, only S[i,j] for j<=i are used, and upper triangle is ignored. X - array[N], input vector. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. RESULT x'*S*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 27.01.2014 by Bochkanov Sergey *************************************************************************/ public static double sparsevsmv(sparsematrix s, bool isupper, double[] x) { double result = 0; int n = 0; int i = 0; int j = 0; int k = 0; int id = 0; int lt = 0; int rt = 0; double v = 0; double v0 = 0; double v1 = 0; int ri = 0; int ri1 = 0; int d = 0; int u = 0; int lt1 = 0; alglib.ap.assert(s.matrixtype==1 || s.matrixtype==2, "SparseVSMV: incorrect matrix type (convert your matrix to CRS/SKS)"); alglib.ap.assert(alglib.ap.len(x)>=s.n, "SparseVSMV: length(X)0 && !isupper ) { lt = ri; rt = ri+d-1; lt1 = i-d; k = d-1; v0 = x[i]; v = 0.0; for(j=0; j<=k; j++) { v = v+x[lt1+j]*s.vals[lt+j]; } result = result+2*v0*v; } if( u>0 && isupper ) { lt = ri1-u; rt = ri1-1; lt1 = i-u; k = u-1; v0 = x[i]; v = 0.0; for(j=0; j<=k; j++) { v = v+x[lt1+j]*s.vals[lt+j]; } result = result+2*v0*v; } } return result; } return result; } /************************************************************************* This function calculates matrix-matrix product S*A. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse M*N matrix in CRS or SKS format. A - array[N][K], input dense matrix. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. K - number of columns of matrix (A). B - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS B - array[M][K], S*A NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsemm(sparsematrix s, double[,] a, int k, ref double[,] b) { double tval = 0; double v = 0; int id = 0; int i = 0; int j = 0; int k0 = 0; int k1 = 0; int lt = 0; int rt = 0; int m = 0; int n = 0; int ri = 0; int ri1 = 0; int lt1 = 0; int rt1 = 0; int d = 0; int u = 0; double vd = 0; int i_ = 0; alglib.ap.assert(s.matrixtype==1 || s.matrixtype==2, "SparseMM: incorrect matrix type (convert your matrix to CRS/SKS)"); alglib.ap.assert(alglib.ap.rows(a)>=s.n, "SparseMM: Rows(A)0, "SparseMM: K<=0"); m = s.m; n = s.n; k1 = k-1; apserv.rmatrixsetlengthatleast(ref b, m, k); for(i=0; i<=m-1; i++) { for(j=0; j<=k-1; j++) { b[i,j] = 0; } } if( s.matrixtype==1 ) { // // CRS format // alglib.ap.assert(s.ninitialized==s.ridx[m], "SparseMM: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)"); if( k0 ) { lt = ri; rt = ri+d-1; lt1 = i-d; rt1 = i-1; for(j=lt1; j<=rt1; j++) { v = s.vals[lt+(j-lt1)]; if( k0 ) { lt = ri1-u; rt = ri1-1; lt1 = i-u; rt1 = i-1; for(j=lt1; j<=rt1; j++) { v = s.vals[lt+(j-lt1)]; if( k=s.m, "SparseMTM: Rows(A)0, "SparseMTM: K<=0"); m = s.m; n = s.n; k1 = k-1; apserv.rmatrixsetlengthatleast(ref b, n, k); for(i=0; i<=n-1; i++) { for(j=0; j<=k-1; j++) { b[i,j] = 0; } } if( s.matrixtype==1 ) { // // CRS format // alglib.ap.assert(s.ninitialized==s.ridx[m], "SparseMTM: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)"); if( k0 ) { lt = ri; rt = ri+d-1; lt1 = i-d; rt1 = i-1; for(j=lt1; j<=rt1; j++) { v = s.vals[lt+(j-lt1)]; if( k0 ) { lt = ri1-u; rt = ri1-1; lt1 = i-u; rt1 = i-1; for(j=lt1; j<=rt1; j++) { v = s.vals[lt+(j-lt1)]; if( k=s.n, "SparseMM2: Rows(A)0, "SparseMM2: K<=0"); n = s.n; k1 = k-1; apserv.rmatrixsetlengthatleast(ref b0, n, k); apserv.rmatrixsetlengthatleast(ref b1, n, k); for(i=0; i<=n-1; i++) { for(j=0; j<=k-1; j++) { b1[i,j] = 0; b0[i,j] = 0; } } if( s.matrixtype==1 ) { // // CRS format // alglib.ap.assert(s.ninitialized==s.ridx[s.m], "SparseMM2: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)"); if( k0 ) { lt = ri; rt = ri+d-1; lt1 = i-d; rt1 = i-1; for(j=lt1; j<=rt1; j++) { v = s.vals[lt+(j-lt1)]; if( k0 ) { lt = ri1-u; rt = ri1-1; lt1 = i-u; rt1 = i-1; for(j=lt1; j<=rt1; j++) { v = s.vals[lt+(j-lt1)]; if( k=i are used, and lower triangle is ignored (it can be empty - these elements are not referenced at all). * if lower triangle is given, only S[i,j] for j<=i are used, and upper triangle is ignored. A - array[N][K], input dense matrix. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. K - number of columns of matrix (A). B - output buffer, possibly preallocated. In case buffer size is too small to store result, this buffer is automatically resized. OUTPUT PARAMETERS B - array[M][K], S*A NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparsesmm(sparsematrix s, bool isupper, double[,] a, int k, ref double[,] b) { int i = 0; int j = 0; int k0 = 0; int id = 0; int k1 = 0; int lt = 0; int rt = 0; double v = 0; double vb = 0; double va = 0; int n = 0; int ri = 0; int ri1 = 0; int lt1 = 0; int rt1 = 0; int d = 0; int u = 0; int i_ = 0; alglib.ap.assert(s.matrixtype==1 || s.matrixtype==2, "SparseSMM: incorrect matrix type (convert your matrix to CRS/SKS)"); alglib.ap.assert(alglib.ap.rows(a)>=s.n, "SparseSMM: Rows(X)linalgswitch ) { for(i=0; i<=n-1; i++) { for(j=0; j<=k-1; j++) { if( s.didx[i]!=s.uidx[i] ) { id = s.didx[i]; b[i,j] = b[i,j]+s.vals[id]*a[s.idx[id],j]; } if( isupper ) { lt = s.uidx[i]; rt = s.ridx[i+1]; vb = 0; va = a[i,j]; for(k0=lt; k0<=rt-1; k0++) { id = s.idx[k0]; v = s.vals[k0]; vb = vb+a[id,j]*v; b[id,j] = b[id,j]+va*v; } b[i,j] = b[i,j]+vb; } else { lt = s.ridx[i]; rt = s.didx[i]; vb = 0; va = a[i,j]; for(k0=lt; k0<=rt-1; k0++) { id = s.idx[k0]; v = s.vals[k0]; vb = vb+a[id,j]*v; b[id,j] = b[id,j]+va*v; } b[i,j] = b[i,j]+vb; } } } } else { for(i=0; i<=n-1; i++) { if( s.didx[i]!=s.uidx[i] ) { id = s.didx[i]; v = s.vals[id]; for(i_=0; i_<=k-1;i_++) { b[i,i_] = b[i,i_] + v*a[s.idx[id],i_]; } } if( isupper ) { lt = s.uidx[i]; rt = s.ridx[i+1]; for(j=lt; j<=rt-1; j++) { id = s.idx[j]; v = s.vals[j]; for(i_=0; i_<=k-1;i_++) { b[i,i_] = b[i,i_] + v*a[id,i_]; } for(i_=0; i_<=k-1;i_++) { b[id,i_] = b[id,i_] + v*a[i,i_]; } } } else { lt = s.ridx[i]; rt = s.didx[i]; for(j=lt; j<=rt-1; j++) { id = s.idx[j]; v = s.vals[j]; for(i_=0; i_<=k-1;i_++) { b[i,i_] = b[i,i_] + v*a[id,i_]; } for(i_=0; i_<=k-1;i_++) { b[id,i_] = b[id,i_] + v*a[i,i_]; } } } } } return; } if( s.matrixtype==2 ) { // // SKS format // alglib.ap.assert(s.m==s.n, "SparseMM2: non-square SKS matrices are not supported"); for(i=0; i<=n-1; i++) { ri = s.ridx[i]; ri1 = s.ridx[i+1]; d = s.didx[i]; u = s.uidx[i]; if( d>0 && !isupper ) { lt = ri; rt = ri+d-1; lt1 = i-d; rt1 = i-1; for(j=lt1; j<=rt1; j++) { v = s.vals[lt+(j-lt1)]; if( k0 && isupper ) { lt = ri1-u; rt = ri1-1; lt1 = i-u; rt1 = i-1; for(j=lt1; j<=rt1; j++) { v = s.vals[lt+(j-lt1)]; if( k=i are used, and lower triangle is ignored (it can be empty - these elements are not referenced at all). * if lower triangle is given, only S[i,j] for j<=i are used, and upper triangle is ignored. IsUnit - unit or non-unit diagonal: * if True, diagonal elements of triangular matrix are considered equal to 1.0. Actual elements stored in S are not referenced at all. * if False, diagonal stored in S is used OpType - operation type: * if 0, S*x is calculated * if 1, (S^T)*x is calculated (transposition) X - array[N] which stores input vector. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. Y - possibly preallocated input buffer. Automatically resized if its size is too small. OUTPUT PARAMETERS Y - array[N], op(S)*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. -- ALGLIB PROJECT -- Copyright 20.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsetrmv(sparsematrix s, bool isupper, bool isunit, int optype, double[] x, ref double[] y) { int n = 0; int i = 0; int j = 0; int k = 0; int j0 = 0; int j1 = 0; double v = 0; int ri = 0; int ri1 = 0; int d = 0; int u = 0; int lt = 0; int rt = 0; int lt1 = 0; int rt1 = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(s.matrixtype==1 || s.matrixtype==2, "SparseTRMV: incorrect matrix type (convert your matrix to CRS/SKS)"); alglib.ap.assert(optype==0 || optype==1, "SparseTRMV: incorrect operation type (must be 0 or 1)"); alglib.ap.assert(alglib.ap.len(x)>=s.n, "SparseTRMV: Length(X)0 && !isupper ) { lt = ri; rt = ri+d-1; lt1 = i-d; rt1 = i-1; if( optype==0 ) { i1_ = (lt1)-(lt); v = 0.0; for(i_=lt; i_<=rt;i_++) { v += s.vals[i_]*x[i_+i1_]; } y[i] = y[i]+v; } else { v = x[i]; i1_ = (lt) - (lt1); for(i_=lt1; i_<=rt1;i_++) { y[i_] = y[i_] + v*s.vals[i_+i1_]; } } } if( u>0 && isupper ) { lt = ri1-u; rt = ri1-1; lt1 = i-u; rt1 = i-1; if( optype==0 ) { v = x[i]; i1_ = (lt) - (lt1); for(i_=lt1; i_<=rt1;i_++) { y[i_] = y[i_] + v*s.vals[i_+i1_]; } } else { i1_ = (lt1)-(lt); v = 0.0; for(i_=lt; i_<=rt;i_++) { v += s.vals[i_]*x[i_+i1_]; } y[i] = y[i]+v; } } } return; } } /************************************************************************* This function solves linear system op(S)*y=x where x is vector, S is symmetric triangular matrix, op(S) is transposition or no operation. Matrix S must be stored in CRS or SKS format (exception will be thrown otherwise). INPUT PARAMETERS S - sparse square matrix in CRS or SKS format. IsUpper - whether upper or lower triangle of S is used: * if upper triangle is given, only S[i,j] for j>=i are used, and lower triangle is ignored (it can be empty - these elements are not referenced at all). * if lower triangle is given, only S[i,j] for j<=i are used, and upper triangle is ignored. IsUnit - unit or non-unit diagonal: * if True, diagonal elements of triangular matrix are considered equal to 1.0. Actual elements stored in S are not referenced at all. * if False, diagonal stored in S is used. It is your responsibility to make sure that diagonal is non-zero. OpType - operation type: * if 0, S*x is calculated * if 1, (S^T)*x is calculated (transposition) X - array[N] which stores input vector. For performance reasons we make only quick checks - we check that array size is at least N, but we do not check for NAN's or INF's. OUTPUT PARAMETERS X - array[N], inv(op(S))*x NOTE: this function throws exception when called for non-CRS/SKS matrix. You must convert your matrix with SparseConvertToCRS/SKS() before using this function. NOTE: no assertion or tests are done during algorithm operation. It is your responsibility to provide invertible matrix to algorithm. -- ALGLIB PROJECT -- Copyright 20.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsetrsv(sparsematrix s, bool isupper, bool isunit, int optype, double[] x) { int n = 0; int fst = 0; int lst = 0; int stp = 0; int i = 0; int j = 0; int k = 0; double v = 0; double vd = 0; int j0 = 0; int j1 = 0; int ri = 0; int ri1 = 0; int d = 0; int u = 0; int lt = 0; int lt1 = 0; alglib.ap.assert(s.matrixtype==1 || s.matrixtype==2, "SparseTRSV: incorrect matrix type (convert your matrix to CRS/SKS)"); alglib.ap.assert(optype==0 || optype==1, "SparseTRSV: incorrect operation type (must be 0 or 1)"); alglib.ap.assert(alglib.ap.len(x)>=s.n, "SparseTRSV: Length(X)0 && i<=lst) || (stp<0 && i>=lst) ) { // // Select range of indexes to process // if( isupper ) { j0 = s.uidx[i]; j1 = s.ridx[i+1]-1; } else { j0 = s.ridx[i]; j1 = s.didx[i]-1; } // // Calculate X[I] // v = 0.0; for(j=j0; j<=j1; j++) { v = v+s.vals[j]*x[s.idx[j]]; } if( !isunit ) { if( s.didx[i]==s.uidx[i] ) { vd = 0; } else { vd = s.vals[s.didx[i]]; } } else { vd = 1.0; } k = apserv.saferdiv(x[i]-v, vd, ref v); alglib.ap.assert(k<=0, "SparseTRSV: near-overflow or division by exact zero"); x[i] = v; // // Next I // i = i+stp; } return; } if( optype==1 ) { // // Transposition. // // (S^T)*x=y with upper or lower triangular S. // if( isupper ) { fst = 0; lst = n-1; stp = 1; } else { fst = n-1; lst = 0; stp = -1; } i = fst; while( (stp>0 && i<=lst) || (stp<0 && i>=lst) ) { // // X[i] already stores A[i,i]*Y[i], the only thing left // is to divide by diagonal element. // if( !isunit ) { if( s.didx[i]==s.uidx[i] ) { vd = 0; } else { vd = s.vals[s.didx[i]]; } } else { vd = 1.0; } k = apserv.saferdiv(x[i], vd, ref v); alglib.ap.assert(k<=0, "SparseTRSV: near-overflow or division by exact zero"); x[i] = v; // // For upper triangular case: // subtract X[i]*Ai from X[i+1:N-1] // // For lower triangular case: // subtract X[i]*Ai from X[0:i-1] // // (here Ai is I-th row of original, untransposed A). // if( isupper ) { j0 = s.uidx[i]; j1 = s.ridx[i+1]-1; } else { j0 = s.ridx[i]; j1 = s.didx[i]-1; } v = x[i]; for(j=j0; j<=j1; j++) { k = s.idx[j]; x[k] = x[k]-s.vals[j]*v; } // // Next I // i = i+stp; } return; } alglib.ap.assert(false, "SparseTRSV: internal error"); } if( s.matrixtype==2 ) { // // SKS format // alglib.ap.assert(s.m==s.n, "SparseTRSV: non-square SKS matrices are not supported"); if( (optype==0 && !isupper) || (optype==1 && isupper) ) { // // Lower triangular op(S) (matrix itself can be upper triangular). // for(i=0; i<=n-1; i++) { // // Select range of indexes to process // ri = s.ridx[i]; ri1 = s.ridx[i+1]; d = s.didx[i]; u = s.uidx[i]; if( isupper ) { lt = i-u; lt1 = ri1-u; k = u-1; } else { lt = i-d; lt1 = ri; k = d-1; } // // Calculate X[I] // v = 0.0; for(j=0; j<=k; j++) { v = v+s.vals[lt1+j]*x[lt+j]; } if( isunit ) { vd = 1; } else { vd = s.vals[ri+d]; } k = apserv.saferdiv(x[i]-v, vd, ref v); alglib.ap.assert(k<=0, "SparseTRSV: near-overflow or division by exact zero"); x[i] = v; } return; } if( (optype==1 && !isupper) || (optype==0 && isupper) ) { // // Upper triangular op(S) (matrix itself can be lower triangular). // for(i=n-1; i>=0; i--) { ri = s.ridx[i]; ri1 = s.ridx[i+1]; d = s.didx[i]; u = s.uidx[i]; // // X[i] already stores A[i,i]*Y[i], the only thing left // is to divide by diagonal element. // if( isunit ) { vd = 1; } else { vd = s.vals[ri+d]; } k = apserv.saferdiv(x[i], vd, ref v); alglib.ap.assert(k<=0, "SparseTRSV: near-overflow or division by exact zero"); x[i] = v; // // Subtract product of X[i] and I-th column of "effective" A from // unprocessed variables. // v = x[i]; if( isupper ) { lt = i-u; lt1 = ri1-u; k = u-1; } else { lt = i-d; lt1 = ri; k = d-1; } for(j=0; j<=k; j++) { x[lt+j] = x[lt+j]-v*s.vals[lt1+j]; } } return; } alglib.ap.assert(false, "SparseTRSV: internal error"); } alglib.ap.assert(false, "SparseTRSV: internal error"); } /************************************************************************* This procedure resizes Hash-Table matrix. It can be called when you have deleted too many elements from the matrix, and you want to free unneeded memory. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparseresizematrix(sparsematrix s) { int k = 0; int k1 = 0; int i = 0; double[] tvals = new double[0]; int[] tidx = new int[0]; alglib.ap.assert(s.matrixtype==0, "SparseResizeMatrix: incorrect matrix type"); // // Initialization for length and number of non-null elementd // k = s.tablesize; k1 = 0; // // Calculating number of non-null elements // for(i=0; i<=k-1; i++) { if( s.idx[2*i]>=0 ) { k1 = k1+1; } } // // Initialization value for free space // s.tablesize = (int)Math.Round(k1/desiredloadfactor*growfactor+additional); s.nfree = s.tablesize-k1; tvals = new double[s.tablesize]; tidx = new int[2*s.tablesize]; alglib.ap.swap(ref s.vals, ref tvals); alglib.ap.swap(ref s.idx, ref tidx); for(i=0; i<=s.tablesize-1; i++) { s.idx[2*i] = -1; } for(i=0; i<=k-1; i++) { if( tidx[2*i]>=0 ) { sparseset(s, tidx[2*i], tidx[2*i+1], tvals[i]); } } } /************************************************************************* This function return average length of chain at hash-table. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static double sparsegetaveragelengthofchain(sparsematrix s) { double result = 0; int nchains = 0; int talc = 0; int l = 0; int i = 0; int ind0 = 0; int ind1 = 0; int hashcode = 0; // // If matrix represent in CRS then return zero and exit // if( s.matrixtype!=0 ) { result = 0; return result; } nchains = 0; talc = 0; l = s.tablesize; for(i=0; i<=l-1; i++) { ind0 = 2*i; if( s.idx[ind0]!=-1 ) { nchains = nchains+1; hashcode = hash(s.idx[ind0], s.idx[ind0+1], l); while( true ) { talc = talc+1; ind1 = 2*hashcode; if( s.idx[ind0]==s.idx[ind1] && s.idx[ind0+1]==s.idx[ind1+1] ) { break; } hashcode = (hashcode+1)%l; } } } if( nchains==0 ) { result = 0; } else { result = (double)talc/(double)nchains; } return result; } /************************************************************************* This function is used to enumerate all elements of the sparse matrix. Before first call user initializes T0 and T1 counters by zero. These counters are used to remember current position in a matrix; after each call they are updated by the function. Subsequent calls to this function return non-zero elements of the sparse matrix, one by one. If you enumerate CRS matrix, matrix is traversed from left to right, from top to bottom. In case you enumerate matrix stored as Hash table, elements are returned in random order. EXAMPLE > T0=0 > T1=0 > while SparseEnumerate(S,T0,T1,I,J,V) do > ....do something with I,J,V INPUT PARAMETERS S - sparse M*N matrix in Hash-Table or CRS representation. T0 - internal counter T1 - internal counter OUTPUT PARAMETERS T0 - new value of the internal counter T1 - new value of the internal counter I - row index of non-zero element, 0<=I=s.ninitialized ) { t0 = 0; t1 = 0; result = false; return result; } while( t0>s.ridx[t1+1]-1 && t1=s.ridx[s.m] ) { t0 = 0; t1 = 0; result = false; return result; } while( t0>s.ridx[t1+1]-1 && t1=S.M)"); alglib.ap.assert(0<=j && j=S.N)"); alglib.ap.assert(math.isfinite(v), "SparseRewriteExisting: invalid argument V(either V is infinite or V is NaN)"); result = false; // // Hash-table matrix // if( s.matrixtype==0 ) { k = s.tablesize; hashcode = hash(i, j, k); while( true ) { if( s.idx[2*hashcode]==-1 ) { return result; } if( s.idx[2*hashcode]==i && s.idx[2*hashcode+1]==j ) { s.vals[hashcode] = v; result = true; return result; } hashcode = (hashcode+1)%k; } } // // CRS matrix // if( s.matrixtype==1 ) { alglib.ap.assert(s.ninitialized==s.ridx[s.m], "SparseRewriteExisting: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)"); k0 = s.ridx[i]; k1 = s.ridx[i+1]-1; while( k0<=k1 ) { k = (k0+k1)/2; if( s.idx[k]==j ) { s.vals[k] = v; result = true; return result; } if( s.idx[k]=0 && i=M"); // // Prepare output buffer // apserv.rvectorsetlengthatleast(ref irow, s.n); for(i0=0; i0<=s.n-1; i0++) { irow[i0] = 0; } // // Output // if( s.matrixtype==1 ) { for(i0=s.ridx[i]; i0<=s.ridx[i+1]-1; i0++) { irow[s.idx[i0]] = s.vals[i0]; } return; } if( s.matrixtype==2 ) { // // Copy subdiagonal and diagonal parts // alglib.ap.assert(s.n==s.m, "SparseGetRow: non-square SKS matrices are not supported"); j0 = i-s.didx[i]; i0 = -j0+s.ridx[i]; for(j=j0; j<=i; j++) { irow[j] = s.vals[j+i0]; } // // Copy superdiagonal part // upperprofile = s.uidx[s.n]; j0 = i+1; j1 = Math.Min(s.n-1, i+upperprofile); for(j=j0; j<=j1; j++) { if( j-i<=s.uidx[j] ) { irow[j] = s.vals[s.ridx[j+1]-(j-i)]; } } return; } } /************************************************************************* This function returns I-th row of the sparse matrix IN COMPRESSED FORMAT - only non-zero elements are returned (with their indexes). Matrix must be stored in CRS or SKS format. INPUT PARAMETERS: S - sparse M*N matrix in CRS format I - row index, 0<=I=0 && i=M"); // // Initialize NZCnt // nzcnt = 0; // // CRS matrix - just copy data // if( s.matrixtype==1 ) { nzcnt = s.ridx[i+1]-s.ridx[i]; apserv.ivectorsetlengthatleast(ref colidx, nzcnt); apserv.rvectorsetlengthatleast(ref vals, nzcnt); k0 = s.ridx[i]; for(k=0; k<=nzcnt-1; k++) { colidx[k] = s.idx[k0+k]; vals[k] = s.vals[k0+k]; } return; } // // SKS matrix - a bit more complex sequence // if( s.matrixtype==2 ) { alglib.ap.assert(s.n==s.m, "SparseGetCompressedRow: non-square SKS matrices are not supported"); // // Allocate enough place for storage // upperprofile = s.uidx[s.n]; apserv.ivectorsetlengthatleast(ref colidx, s.didx[i]+1+upperprofile); apserv.rvectorsetlengthatleast(ref vals, s.didx[i]+1+upperprofile); // // Copy subdiagonal and diagonal parts // j0 = i-s.didx[i]; i0 = -j0+s.ridx[i]; for(j=j0; j<=i; j++) { colidx[nzcnt] = j; vals[nzcnt] = s.vals[j+i0]; nzcnt = nzcnt+1; } // // Copy superdiagonal part // j0 = i+1; j1 = Math.Min(s.n-1, i+upperprofile); for(j=j0; j<=j1; j++) { if( j-i<=s.uidx[j] ) { colidx[nzcnt] = j; vals[nzcnt] = s.vals[s.ridx[j+1]-(j-i)]; nzcnt = nzcnt+1; } } return; } } /************************************************************************* This function performs efficient in-place transpose of SKS matrix. No additional memory is allocated during transposition. This function supports only skyline storage format (SKS). INPUT PARAMETERS S - sparse matrix in SKS format. OUTPUT PARAMETERS S - sparse matrix, transposed. -- ALGLIB PROJECT -- Copyright 16.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsetransposesks(sparsematrix s) { int n = 0; int d = 0; int u = 0; int i = 0; int k = 0; int t0 = 0; int t1 = 0; double v = 0; alglib.ap.assert(s.matrixtype==2, "SparseTransposeSKS: only SKS matrices are supported"); alglib.ap.assert(s.m==s.n, "SparseTransposeSKS: non-square SKS matrices are not supported"); n = s.n; for(i=1; i<=n-1; i++) { d = s.didx[i]; u = s.uidx[i]; k = s.uidx[i]; s.uidx[i] = s.didx[i]; s.didx[i] = k; if( d==u ) { // // Upper skyline height equal to lower skyline height, // simple exchange is needed for transposition // t0 = s.ridx[i]; for(k=0; k<=d-1; k++) { v = s.vals[t0+k]; s.vals[t0+k] = s.vals[t0+d+1+k]; s.vals[t0+d+1+k] = v; } } if( d>u ) { // // Upper skyline height is less than lower skyline height. // // Transposition becomes a bit tricky: we have to rearrange // "L0 L1 D U" to "U D L0 L1", where |L0|=|U|=u, |L1|=d-u. // // In order to do this we perform a sequence of swaps and // in-place reversals: // * swap(L0,U) => "U L1 D L0" // * reverse("L1 D L0") => "U L0~ D L1~" (where X~ is a reverse of X) // * reverse("L0~ D") => "U D L0 L1~" // * reverse("L1") => "U D L0 L1" // t0 = s.ridx[i]; t1 = s.ridx[i]+d+1; for(k=0; k<=u-1; k++) { v = s.vals[t0+k]; s.vals[t0+k] = s.vals[t1+k]; s.vals[t1+k] = v; } t0 = s.ridx[i]+u; t1 = s.ridx[i+1]-1; while( t1>t0 ) { v = s.vals[t0]; s.vals[t0] = s.vals[t1]; s.vals[t1] = v; t0 = t0+1; t1 = t1-1; } t0 = s.ridx[i]+u; t1 = s.ridx[i]+u+u; while( t1>t0 ) { v = s.vals[t0]; s.vals[t0] = s.vals[t1]; s.vals[t1] = v; t0 = t0+1; t1 = t1-1; } t0 = s.ridx[i+1]-(d-u); t1 = s.ridx[i+1]-1; while( t1>t0 ) { v = s.vals[t0]; s.vals[t0] = s.vals[t1]; s.vals[t1] = v; t0 = t0+1; t1 = t1-1; } } if( d "U1 D U0 L" // * reverse("U1 D U0") => "U0~ D U1~ L" (where X~ is a reverse of X) // * reverse("U0~") => "U0 D U1~ L" // * reverse("D U1~") => "U0 U1 D L" // t0 = s.ridx[i]; t1 = s.ridx[i+1]-d; for(k=0; k<=d-1; k++) { v = s.vals[t0+k]; s.vals[t0+k] = s.vals[t1+k]; s.vals[t1+k] = v; } t0 = s.ridx[i]; t1 = s.ridx[i]+u; while( t1>t0 ) { v = s.vals[t0]; s.vals[t0] = s.vals[t1]; s.vals[t1] = v; t0 = t0+1; t1 = t1-1; } t0 = s.ridx[i]; t1 = s.ridx[i]+u-d-1; while( t1>t0 ) { v = s.vals[t0]; s.vals[t0] = s.vals[t1]; s.vals[t1] = v; t0 = t0+1; t1 = t1-1; } t0 = s.ridx[i]+u-d; t1 = s.ridx[i+1]-d-1; while( t1>t0 ) { v = s.vals[t0]; s.vals[t0] = s.vals[t1]; s.vals[t1] = v; t0 = t0+1; t1 = t1-1; } } } k = s.uidx[n]; s.uidx[n] = s.didx[n]; s.didx[n] = k; } /************************************************************************* This function performs in-place conversion to desired sparse storage format. INPUT PARAMETERS S0 - sparse matrix in any format. Fmt - desired storage format of the output, as returned by SparseGetMatrixType() function: * 0 for hash-based storage * 1 for CRS * 2 for SKS OUTPUT PARAMETERS S0 - sparse matrix in requested format. NOTE: in-place conversion wastes a lot of memory which is used to store temporaries. If you perform a lot of repeated conversions, we recommend to use out-of-place buffered conversion functions, like SparseCopyToBuf(), which can reuse already allocated memory. -- ALGLIB PROJECT -- Copyright 16.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparseconvertto(sparsematrix s0, int fmt) { alglib.ap.assert((fmt==0 || fmt==1) || fmt==2, "SparseConvertTo: invalid fmt parameter"); if( fmt==0 ) { sparseconverttohash(s0); return; } if( fmt==1 ) { sparseconverttocrs(s0); return; } if( fmt==2 ) { sparseconverttosks(s0); return; } alglib.ap.assert(false, "SparseConvertTo: invalid matrix type"); } /************************************************************************* This function performs out-of-place conversion to desired sparse storage format. S0 is copied to S1 and converted on-the-fly. Memory allocated in S1 is reused to maximum extent possible. INPUT PARAMETERS S0 - sparse matrix in any format. Fmt - desired storage format of the output, as returned by SparseGetMatrixType() function: * 0 for hash-based storage * 1 for CRS * 2 for SKS OUTPUT PARAMETERS S1 - sparse matrix in requested format. -- ALGLIB PROJECT -- Copyright 16.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparsecopytobuf(sparsematrix s0, int fmt, sparsematrix s1) { alglib.ap.assert((fmt==0 || fmt==1) || fmt==2, "SparseCopyToBuf: invalid fmt parameter"); if( fmt==0 ) { sparsecopytohashbuf(s0, s1); return; } if( fmt==1 ) { sparsecopytocrsbuf(s0, s1); return; } if( fmt==2 ) { sparsecopytosksbuf(s0, s1); return; } alglib.ap.assert(false, "SparseCopyToBuf: invalid matrix type"); } /************************************************************************* This function performs in-place conversion to Hash table storage. INPUT PARAMETERS S - sparse matrix in CRS format. OUTPUT PARAMETERS S - sparse matrix in Hash table format. NOTE: this function has no effect when called with matrix which is already in Hash table mode. NOTE: in-place conversion involves allocation of temporary arrays. If you perform a lot of repeated in- place conversions, it may lead to memory fragmentation. Consider using out-of-place SparseCopyToHashBuf() function in this case. -- ALGLIB PROJECT -- Copyright 20.07.2012 by Bochkanov Sergey *************************************************************************/ public static void sparseconverttohash(sparsematrix s) { int[] tidx = new int[0]; int[] tridx = new int[0]; int[] tdidx = new int[0]; int[] tuidx = new int[0]; double[] tvals = new double[0]; int n = 0; int m = 0; int offs0 = 0; int i = 0; int j = 0; int k = 0; alglib.ap.assert((s.matrixtype==0 || s.matrixtype==1) || s.matrixtype==2, "SparseConvertToHash: invalid matrix type"); if( s.matrixtype==0 ) { // // Already in Hash mode // return; } if( s.matrixtype==1 ) { // // From CRS to Hash // s.matrixtype = 0; m = s.m; n = s.n; alglib.ap.swap(ref s.idx, ref tidx); alglib.ap.swap(ref s.ridx, ref tridx); alglib.ap.swap(ref s.vals, ref tvals); sparsecreatebuf(m, n, tridx[m], s); for(i=0; i<=m-1; i++) { for(j=tridx[i]; j<=tridx[i+1]-1; j++) { sparseset(s, i, tidx[j], tvals[j]); } } return; } if( s.matrixtype==2 ) { // // From SKS to Hash // s.matrixtype = 0; m = s.m; n = s.n; alglib.ap.swap(ref s.ridx, ref tridx); alglib.ap.swap(ref s.didx, ref tdidx); alglib.ap.swap(ref s.uidx, ref tuidx); alglib.ap.swap(ref s.vals, ref tvals); sparsecreatebuf(m, n, tridx[m], s); for(i=0; i<=m-1; i++) { // // copy subdiagonal and diagonal parts of I-th block // offs0 = tridx[i]; k = tdidx[i]+1; for(j=0; j<=k-1; j++) { sparseset(s, i, i-tdidx[i]+j, tvals[offs0+j]); } // // Copy superdiagonal part of I-th block // offs0 = tridx[i]+tdidx[i]+1; k = tuidx[i]; for(j=0; j<=k-1; j++) { sparseset(s, i-k+j, i, tvals[offs0+j]); } } return; } alglib.ap.assert(false, "SparseConvertToHash: invalid matrix type"); } /************************************************************************* This function performs out-of-place conversion to Hash table storage format. S0 is copied to S1 and converted on-the-fly. INPUT PARAMETERS S0 - sparse matrix in any format. OUTPUT PARAMETERS S1 - sparse matrix in Hash table format. NOTE: if S0 is stored as Hash-table, it is just copied without conversion. NOTE: this function de-allocates memory occupied by S1 before starting conversion. If you perform a lot of repeated conversions, it may lead to memory fragmentation. In this case we recommend you to use SparseCopyToHashBuf() function which re-uses memory in S1 as much as possible. -- ALGLIB PROJECT -- Copyright 20.07.2012 by Bochkanov Sergey *************************************************************************/ public static void sparsecopytohash(sparsematrix s0, sparsematrix s1) { alglib.ap.assert((s0.matrixtype==0 || s0.matrixtype==1) || s0.matrixtype==2, "SparseCopyToHash: invalid matrix type"); sparsecopytohashbuf(s0, s1); } /************************************************************************* This function performs out-of-place conversion to Hash table storage format. S0 is copied to S1 and converted on-the-fly. Memory allocated in S1 is reused to maximum extent possible. INPUT PARAMETERS S0 - sparse matrix in any format. OUTPUT PARAMETERS S1 - sparse matrix in Hash table format. NOTE: if S0 is stored as Hash-table, it is just copied without conversion. -- ALGLIB PROJECT -- Copyright 20.07.2012 by Bochkanov Sergey *************************************************************************/ public static void sparsecopytohashbuf(sparsematrix s0, sparsematrix s1) { double val = 0; int t0 = 0; int t1 = 0; int i = 0; int j = 0; alglib.ap.assert((s0.matrixtype==0 || s0.matrixtype==1) || s0.matrixtype==2, "SparseCopyToHashBuf: invalid matrix type"); if( s0.matrixtype==0 ) { // // Already hash, just copy // sparsecopybuf(s0, s1); return; } if( s0.matrixtype==1 ) { // // CRS storage // t0 = 0; t1 = 0; sparsecreatebuf(s0.m, s0.n, s0.ridx[s0.m], s1); while( sparseenumerate(s0, ref t0, ref t1, ref i, ref j, ref val) ) { sparseset(s1, i, j, val); } return; } if( s0.matrixtype==2 ) { // // SKS storage // t0 = 0; t1 = 0; sparsecreatebuf(s0.m, s0.n, s0.ridx[s0.m], s1); while( sparseenumerate(s0, ref t0, ref t1, ref i, ref j, ref val) ) { sparseset(s1, i, j, val); } return; } alglib.ap.assert(false, "SparseCopyToHashBuf: invalid matrix type"); } /************************************************************************* This function converts matrix to CRS format. Some algorithms (linear algebra ones, for example) require matrices in CRS format. This function allows to perform in-place conversion. INPUT PARAMETERS S - sparse M*N matrix in any format OUTPUT PARAMETERS S - matrix in CRS format NOTE: this function has no effect when called with matrix which is already in CRS mode. NOTE: this function allocates temporary memory to store a copy of the matrix. If you perform a lot of repeated conversions, we recommend you to use SparseCopyToCRSBuf() function, which can reuse previously allocated memory. -- ALGLIB PROJECT -- Copyright 14.10.2011 by Bochkanov Sergey *************************************************************************/ public static void sparseconverttocrs(sparsematrix s) { int m = 0; int i = 0; int j = 0; double[] tvals = new double[0]; int[] tidx = new int[0]; int[] temp = new int[0]; int[] tridx = new int[0]; int nonne = 0; int k = 0; int offs0 = 0; int offs1 = 0; m = s.m; if( s.matrixtype==0 ) { // // From Hash-table to CRS. // First, create local copy of the hash table. // s.matrixtype = 1; k = s.tablesize; alglib.ap.swap(ref s.vals, ref tvals); alglib.ap.swap(ref s.idx, ref tidx); // // Fill RIdx by number of elements per row: // RIdx[I+1] stores number of elements in I-th row. // // Convert RIdx from row sizes to row offsets. // Set NInitialized // nonne = 0; apserv.ivectorsetlengthatleast(ref s.ridx, s.m+1); for(i=0; i<=s.m; i++) { s.ridx[i] = 0; } for(i=0; i<=k-1; i++) { if( tidx[2*i]>=0 ) { s.ridx[tidx[2*i]+1] = s.ridx[tidx[2*i]+1]+1; nonne = nonne+1; } } for(i=0; i<=s.m-1; i++) { s.ridx[i+1] = s.ridx[i+1]+s.ridx[i]; } s.ninitialized = s.ridx[s.m]; // // Allocate memory and move elements to Vals/Idx. // Initially, elements are sorted by rows, but unsorted within row. // After initial insertion we sort elements within row. // temp = new int[s.m]; for(i=0; i<=s.m-1; i++) { temp[i] = 0; } apserv.rvectorsetlengthatleast(ref s.vals, nonne); apserv.ivectorsetlengthatleast(ref s.idx, nonne); for(i=0; i<=k-1; i++) { if( tidx[2*i]>=0 ) { s.vals[s.ridx[tidx[2*i]]+temp[tidx[2*i]]] = tvals[i]; s.idx[s.ridx[tidx[2*i]]+temp[tidx[2*i]]] = tidx[2*i+1]; temp[tidx[2*i]] = temp[tidx[2*i]]+1; } } for(i=0; i<=s.m-1; i++) { tsort.tagsortmiddleir(ref s.idx, ref s.vals, s.ridx[i], s.ridx[i+1]-s.ridx[i]); } // // Initialization 'S.UIdx' and 'S.DIdx' // sparseinitduidx(s); return; } if( s.matrixtype==1 ) { // // Already CRS // return; } if( s.matrixtype==2 ) { alglib.ap.assert(s.m==s.n, "SparseConvertToCRS: non-square SKS matrices are not supported"); // // From SKS to CRS. // // First, create local copy of the SKS matrix (Vals, // Idx, RIdx are stored; DIdx/UIdx for some time are // left in the SparseMatrix structure). // s.matrixtype = 1; alglib.ap.swap(ref s.vals, ref tvals); alglib.ap.swap(ref s.idx, ref tidx); alglib.ap.swap(ref s.ridx, ref tridx); // // Fill RIdx by number of elements per row: // RIdx[I+1] stores number of elements in I-th row. // // Convert RIdx from row sizes to row offsets. // Set NInitialized // apserv.ivectorsetlengthatleast(ref s.ridx, m+1); s.ridx[0] = 0; for(i=1; i<=m; i++) { s.ridx[i] = 1; } nonne = 0; for(i=0; i<=m-1; i++) { s.ridx[i+1] = s.didx[i]+s.ridx[i+1]; for(j=i-s.uidx[i]; j<=i-1; j++) { s.ridx[j+1] = s.ridx[j+1]+1; } nonne = nonne+s.didx[i]+1+s.uidx[i]; } for(i=0; i<=s.m-1; i++) { s.ridx[i+1] = s.ridx[i+1]+s.ridx[i]; } s.ninitialized = s.ridx[s.m]; // // Allocate memory and move elements to Vals/Idx. // Initially, elements are sorted by rows, and are sorted within row too. // No additional post-sorting is required. // temp = new int[m]; for(i=0; i<=m-1; i++) { temp[i] = 0; } apserv.rvectorsetlengthatleast(ref s.vals, nonne); apserv.ivectorsetlengthatleast(ref s.idx, nonne); for(i=0; i<=m-1; i++) { // // copy subdiagonal and diagonal parts of I-th block // offs0 = tridx[i]; offs1 = s.ridx[i]+temp[i]; k = s.didx[i]+1; for(j=0; j<=k-1; j++) { s.vals[offs1+j] = tvals[offs0+j]; s.idx[offs1+j] = i-s.didx[i]+j; } temp[i] = temp[i]+s.didx[i]+1; // // Copy superdiagonal part of I-th block // offs0 = tridx[i]+s.didx[i]+1; k = s.uidx[i]; for(j=0; j<=k-1; j++) { offs1 = s.ridx[i-k+j]+temp[i-k+j]; s.vals[offs1] = tvals[offs0+j]; s.idx[offs1] = i; temp[i-k+j] = temp[i-k+j]+1; } } // // Initialization 'S.UIdx' and 'S.DIdx' // sparseinitduidx(s); return; } alglib.ap.assert(false, "SparseConvertToCRS: invalid matrix type"); } /************************************************************************* This function performs out-of-place conversion to CRS format. S0 is copied to S1 and converted on-the-fly. INPUT PARAMETERS S0 - sparse matrix in any format. OUTPUT PARAMETERS S1 - sparse matrix in CRS format. NOTE: if S0 is stored as CRS, it is just copied without conversion. NOTE: this function de-allocates memory occupied by S1 before starting CRS conversion. If you perform a lot of repeated CRS conversions, it may lead to memory fragmentation. In this case we recommend you to use SparseCopyToCRSBuf() function which re-uses memory in S1 as much as possible. -- ALGLIB PROJECT -- Copyright 20.07.2012 by Bochkanov Sergey *************************************************************************/ public static void sparsecopytocrs(sparsematrix s0, sparsematrix s1) { alglib.ap.assert((s0.matrixtype==0 || s0.matrixtype==1) || s0.matrixtype==2, "SparseCopyToCRS: invalid matrix type"); sparsecopytocrsbuf(s0, s1); } /************************************************************************* This function performs out-of-place conversion to CRS format. S0 is copied to S1 and converted on-the-fly. Memory allocated in S1 is reused to maximum extent possible. INPUT PARAMETERS S0 - sparse matrix in any format. S1 - matrix which may contain some pre-allocated memory, or can be just uninitialized structure. OUTPUT PARAMETERS S1 - sparse matrix in CRS format. NOTE: if S0 is stored as CRS, it is just copied without conversion. -- ALGLIB PROJECT -- Copyright 20.07.2012 by Bochkanov Sergey *************************************************************************/ public static void sparsecopytocrsbuf(sparsematrix s0, sparsematrix s1) { int[] temp = new int[0]; int nonne = 0; int i = 0; int j = 0; int k = 0; int offs0 = 0; int offs1 = 0; int m = 0; alglib.ap.assert((s0.matrixtype==0 || s0.matrixtype==1) || s0.matrixtype==2, "SparseCopyToCRSBuf: invalid matrix type"); m = s0.m; if( s0.matrixtype==0 ) { // // Convert from hash-table to CRS // Done like ConvertToCRS function // s1.matrixtype = 1; s1.m = s0.m; s1.n = s0.n; s1.nfree = s0.nfree; nonne = 0; k = s0.tablesize; apserv.ivectorsetlengthatleast(ref s1.ridx, s1.m+1); for(i=0; i<=s1.m; i++) { s1.ridx[i] = 0; } temp = new int[s1.m]; for(i=0; i<=s1.m-1; i++) { temp[i] = 0; } // // Number of elements per row // for(i=0; i<=k-1; i++) { if( s0.idx[2*i]>=0 ) { s1.ridx[s0.idx[2*i]+1] = s1.ridx[s0.idx[2*i]+1]+1; nonne = nonne+1; } } // // Fill RIdx (offsets of rows) // for(i=0; i<=s1.m-1; i++) { s1.ridx[i+1] = s1.ridx[i+1]+s1.ridx[i]; } // // Allocate memory // apserv.rvectorsetlengthatleast(ref s1.vals, nonne); apserv.ivectorsetlengthatleast(ref s1.idx, nonne); for(i=0; i<=k-1; i++) { if( s0.idx[2*i]>=0 ) { s1.vals[s1.ridx[s0.idx[2*i]]+temp[s0.idx[2*i]]] = s0.vals[i]; s1.idx[s1.ridx[s0.idx[2*i]]+temp[s0.idx[2*i]]] = s0.idx[2*i+1]; temp[s0.idx[2*i]] = temp[s0.idx[2*i]]+1; } } // // Set NInitialized // s1.ninitialized = s1.ridx[s1.m]; // // Sorting of elements // for(i=0; i<=s1.m-1; i++) { tsort.tagsortmiddleir(ref s1.idx, ref s1.vals, s1.ridx[i], s1.ridx[i+1]-s1.ridx[i]); } // // Initialization 'S.UIdx' and 'S.DIdx' // sparseinitduidx(s1); return; } if( s0.matrixtype==1 ) { // // Already CRS, just copy // sparsecopybuf(s0, s1); return; } if( s0.matrixtype==2 ) { alglib.ap.assert(s0.m==s0.n, "SparseCopyToCRS: non-square SKS matrices are not supported"); // // From SKS to CRS. // s1.m = s0.m; s1.n = s0.n; s1.matrixtype = 1; // // Fill RIdx by number of elements per row: // RIdx[I+1] stores number of elements in I-th row. // // Convert RIdx from row sizes to row offsets. // Set NInitialized // apserv.ivectorsetlengthatleast(ref s1.ridx, m+1); s1.ridx[0] = 0; for(i=1; i<=m; i++) { s1.ridx[i] = 1; } nonne = 0; for(i=0; i<=m-1; i++) { s1.ridx[i+1] = s0.didx[i]+s1.ridx[i+1]; for(j=i-s0.uidx[i]; j<=i-1; j++) { s1.ridx[j+1] = s1.ridx[j+1]+1; } nonne = nonne+s0.didx[i]+1+s0.uidx[i]; } for(i=0; i<=m-1; i++) { s1.ridx[i+1] = s1.ridx[i+1]+s1.ridx[i]; } s1.ninitialized = s1.ridx[m]; // // Allocate memory and move elements to Vals/Idx. // Initially, elements are sorted by rows, and are sorted within row too. // No additional post-sorting is required. // temp = new int[m]; for(i=0; i<=m-1; i++) { temp[i] = 0; } apserv.rvectorsetlengthatleast(ref s1.vals, nonne); apserv.ivectorsetlengthatleast(ref s1.idx, nonne); for(i=0; i<=m-1; i++) { // // copy subdiagonal and diagonal parts of I-th block // offs0 = s0.ridx[i]; offs1 = s1.ridx[i]+temp[i]; k = s0.didx[i]+1; for(j=0; j<=k-1; j++) { s1.vals[offs1+j] = s0.vals[offs0+j]; s1.idx[offs1+j] = i-s0.didx[i]+j; } temp[i] = temp[i]+s0.didx[i]+1; // // Copy superdiagonal part of I-th block // offs0 = s0.ridx[i]+s0.didx[i]+1; k = s0.uidx[i]; for(j=0; j<=k-1; j++) { offs1 = s1.ridx[i-k+j]+temp[i-k+j]; s1.vals[offs1] = s0.vals[offs0+j]; s1.idx[offs1] = i; temp[i-k+j] = temp[i-k+j]+1; } } // // Initialization 'S.UIdx' and 'S.DIdx' // sparseinitduidx(s1); return; } alglib.ap.assert(false, "SparseCopyToCRSBuf: unexpected matrix type"); } /************************************************************************* This function performs in-place conversion to SKS format. INPUT PARAMETERS S - sparse matrix in any format. OUTPUT PARAMETERS S - sparse matrix in SKS format. NOTE: this function has no effect when called with matrix which is already in SKS mode. NOTE: in-place conversion involves allocation of temporary arrays. If you perform a lot of repeated in- place conversions, it may lead to memory fragmentation. Consider using out-of-place SparseCopyToSKSBuf() function in this case. -- ALGLIB PROJECT -- Copyright 15.01.2014 by Bochkanov Sergey *************************************************************************/ public static void sparseconverttosks(sparsematrix s) { int[] tridx = new int[0]; int[] tdidx = new int[0]; int[] tuidx = new int[0]; double[] tvals = new double[0]; int n = 0; int t0 = 0; int t1 = 0; int i = 0; int j = 0; int k = 0; double v = 0; alglib.ap.assert((s.matrixtype==0 || s.matrixtype==1) || s.matrixtype==2, "SparseConvertToSKS: invalid matrix type"); alglib.ap.assert(s.m==s.n, "SparseConvertToSKS: rectangular matrices are not supported"); n = s.n; if( s.matrixtype==2 ) { // // Already in SKS mode // return; } // // Generate internal copy of SKS matrix // apserv.ivectorsetlengthatleast(ref tdidx, n+1); apserv.ivectorsetlengthatleast(ref tuidx, n+1); for(i=0; i<=n; i++) { tdidx[i] = 0; tuidx[i] = 0; } t0 = 0; t1 = 0; while( sparseenumerate(s, ref t0, ref t1, ref i, ref j, ref v) ) { if( j=0 && s.idx[2*i0+1]>i ) { result = result+1; } } return result; } if( s.matrixtype==1 ) { // // CRS matrix // alglib.ap.assert(s.ninitialized==s.ridx[s.m], "SparseGetUpperCount: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)"); result = 0; sz = s.m; for(i=0; i<=sz-1; i++) { result = result+(s.ridx[i+1]-s.uidx[i]); } return result; } if( s.matrixtype==2 ) { // // SKS matrix // alglib.ap.assert(s.m==s.n, "SparseGetUpperCount: non-square SKS matrices are not supported"); result = 0; sz = s.m; for(i=0; i<=sz-1; i++) { result = result+s.uidx[i]; } return result; } alglib.ap.assert(false, "SparseGetUpperCount: internal error"); return result; } /************************************************************************* The function returns number of strictly lower triangular non-zero elements in the matrix. It counts SYMBOLICALLY non-zero elements, i.e. entries in the sparse matrix data structure. If some element has zero numerical value, it is still counted. This function has different cost for different types of matrices: * for hash-based matrices it involves complete pass over entire hash-table with O(NNZ) cost, where NNZ is number of non-zero elements * for CRS and SKS matrix types cost of counting is O(N) (N - matrix size). RESULT: number of non-zero elements strictly below main diagonal -- ALGLIB PROJECT -- Copyright 12.02.2014 by Bochkanov Sergey *************************************************************************/ public static int sparsegetlowercount(sparsematrix s) { int result = 0; int sz = 0; int i0 = 0; int i = 0; result = -1; if( s.matrixtype==0 ) { // // Hash-table matrix // result = 0; sz = s.tablesize; for(i0=0; i0<=sz-1; i0++) { i = s.idx[2*i0]; if( i>=0 && s.idx[2*i0+1]>N * worst case - N>>M, small M, large N, matrix does not fit in CPU cache COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that LU decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - array[0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. OUTPUT PARAMETERS: A - matrices L and U in compact form: * L is stored under main diagonal * U is stored on and above main diagonal Pivots - permutation matrix in compact form. array[0..Min(M-1,N-1)]. -- ALGLIB routine -- 10.01.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixlu(ref double[,] a, int m, int n, ref int[] pivots) { pivots = new int[0]; alglib.ap.assert(m>0, "RMatrixLU: incorrect M!"); alglib.ap.assert(n>0, "RMatrixLU: incorrect N!"); rmatrixplu(ref a, m, n, ref pivots); } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_rmatrixlu(ref double[,] a, int m, int n, ref int[] pivots) { rmatrixlu(ref a,m,n,ref pivots); } /************************************************************************* LU decomposition of a general complex matrix with row pivoting A is represented as A = P*L*U, where: * L is lower unitriangular matrix * U is upper triangular matrix * P = P0*P1*...*PK, K=min(M,N)-1, Pi - permutation matrix for I and Pivots[I] This is cache-oblivous implementation of LU decomposition. It is optimized for square matrices. As for rectangular matrices: * best case - M>>N * worst case - N>>M, small M, large N, matrix does not fit in CPU cache COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that LU decomposition is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - array[0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. OUTPUT PARAMETERS: A - matrices L and U in compact form: * L is stored under main diagonal * U is stored on and above main diagonal Pivots - permutation matrix in compact form. array[0..Min(M-1,N-1)]. -- ALGLIB routine -- 10.01.2010 Bochkanov Sergey *************************************************************************/ public static void cmatrixlu(ref complex[,] a, int m, int n, ref int[] pivots) { pivots = new int[0]; alglib.ap.assert(m>0, "CMatrixLU: incorrect M!"); alglib.ap.assert(n>0, "CMatrixLU: incorrect N!"); cmatrixplu(ref a, m, n, ref pivots); } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixlu(ref complex[,] a, int m, int n, ref int[] pivots) { cmatrixlu(ref a,m,n,ref pivots); } /************************************************************************* Cache-oblivious Cholesky decomposition The algorithm computes Cholesky decomposition of a Hermitian positive- definite matrix. The result of an algorithm is a representation of A as A=U'*U or A=L*L' (here X' detones conj(X^T)). COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that Cholesky decomposition is harder ! to parallelize than, say, matrix-matrix product - this algorithm has ! several synchronization points which can not be avoided. However, ! parallelism starts to be profitable starting from N=500. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - upper or lower triangle of a factorized matrix. array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - if IsUpper=True, then A contains an upper triangle of a symmetric matrix, otherwise A contains a lower one. OUTPUT PARAMETERS: A - the result of factorization. If IsUpper=True, then the upper triangle contains matrix U, so that A = U'*U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. RESULT: If the matrix is positive-definite, the function returns True. Otherwise, the function returns False. Contents of A is not determined in such case. -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static bool hpdmatrixcholesky(ref complex[,] a, int n, bool isupper) { bool result = new bool(); complex[] tmp = new complex[0]; if( n<1 ) { result = false; return result; } result = hpdmatrixcholeskyrec(ref a, 0, n, isupper, ref tmp); return result; } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static bool _pexec_hpdmatrixcholesky(ref complex[,] a, int n, bool isupper) { return hpdmatrixcholesky(ref a,n,isupper); } /************************************************************************* Cache-oblivious Cholesky decomposition The algorithm computes Cholesky decomposition of a symmetric positive- definite matrix. The result of an algorithm is a representation of A as A=U^T*U or A=L*L^T COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that Cholesky decomposition is harder ! to parallelize than, say, matrix-matrix product - this algorithm has ! several synchronization points which can not be avoided. However, ! parallelism starts to be profitable starting from N=500. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - upper or lower triangle of a factorized matrix. array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - if IsUpper=True, then A contains an upper triangle of a symmetric matrix, otherwise A contains a lower one. OUTPUT PARAMETERS: A - the result of factorization. If IsUpper=True, then the upper triangle contains matrix U, so that A = U^T*U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. RESULT: If the matrix is positive-definite, the function returns True. Otherwise, the function returns False. Contents of A is not determined in such case. -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static bool spdmatrixcholesky(ref double[,] a, int n, bool isupper) { bool result = new bool(); double[] tmp = new double[0]; if( n<1 ) { result = false; return result; } result = spdmatrixcholeskyrec(ref a, 0, n, isupper, ref tmp); return result; } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static bool _pexec_spdmatrixcholesky(ref double[,] a, int n, bool isupper) { return spdmatrixcholesky(ref a,n,isupper); } /************************************************************************* Update of Cholesky decomposition: rank-1 update to original A. "Buffered" version which uses preallocated buffer which is saved between subsequent function calls. This function uses internally allocated buffer which is not saved between subsequent calls. So, if you perform a lot of subsequent updates, we recommend you to use "buffered" version of this function: SPDMatrixCholeskyUpdateAdd1Buf(). INPUT PARAMETERS: A - upper or lower Cholesky factor. array with elements [0..N-1, 0..N-1]. Exception is thrown if array size is too small. N - size of matrix A, N>0 IsUpper - if IsUpper=True, then A contains upper Cholesky factor; otherwise A contains a lower one. U - array[N], rank-1 update to A: A_mod = A + u*u' Exception is thrown if array size is too small. BufR - possibly preallocated buffer; automatically resized if needed. It is recommended to reuse this buffer if you perform a lot of subsequent decompositions. OUTPUT PARAMETERS: A - updated factorization. If IsUpper=True, then the upper triangle contains matrix U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. NOTE: this function always succeeds, so it does not return completion code NOTE: this function checks sizes of input arrays, but it does NOT checks for presence of infinities or NAN's. -- ALGLIB -- 03.02.2014 Sergey Bochkanov *************************************************************************/ public static void spdmatrixcholeskyupdateadd1(double[,] a, int n, bool isupper, double[] u) { double[] bufr = new double[0]; alglib.ap.assert(n>0, "SPDMatrixCholeskyUpdateAdd1: N<=0"); alglib.ap.assert(alglib.ap.rows(a)>=n, "SPDMatrixCholeskyUpdateAdd1: Rows(A)=n, "SPDMatrixCholeskyUpdateAdd1: Cols(A)=n, "SPDMatrixCholeskyUpdateAdd1: Length(U) ( Af20 0 Af22 Af23 ) ( A30 A31 A32 A33 ) ( Af30 0 Af32 Af33 ) If we have Cholesky decomposition of A, it must be recalculated after variables were fixed. However, it is possible to use efficient algorithm, which needs O(K*N^2) time to "fix" K variables, given Cholesky decomposition of original, "unfixed" A. INPUT PARAMETERS: A - upper or lower Cholesky factor. array with elements [0..N-1, 0..N-1]. Exception is thrown if array size is too small. N - size of matrix A, N>0 IsUpper - if IsUpper=True, then A contains upper Cholesky factor; otherwise A contains a lower one. Fix - array[N], I-th element is True if I-th variable must be fixed. Exception is thrown if array size is too small. BufR - possibly preallocated buffer; automatically resized if needed. It is recommended to reuse this buffer if you perform a lot of subsequent decompositions. OUTPUT PARAMETERS: A - updated factorization. If IsUpper=True, then the upper triangle contains matrix U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. NOTE: this function always succeeds, so it does not return completion code NOTE: this function checks sizes of input arrays, but it does NOT checks for presence of infinities or NAN's. NOTE: this function is efficient only for moderate amount of updated variables - say, 0.1*N or 0.3*N. For larger amount of variables it will still work, but you may get better performance with straightforward Cholesky. -- ALGLIB -- 03.02.2014 Sergey Bochkanov *************************************************************************/ public static void spdmatrixcholeskyupdatefix(double[,] a, int n, bool isupper, bool[] fix) { double[] bufr = new double[0]; alglib.ap.assert(n>0, "SPDMatrixCholeskyUpdateFix: N<=0"); alglib.ap.assert(alglib.ap.rows(a)>=n, "SPDMatrixCholeskyUpdateFix: Rows(A)=n, "SPDMatrixCholeskyUpdateFix: Cols(A)=n, "SPDMatrixCholeskyUpdateFix: Length(Fix)0 IsUpper - if IsUpper=True, then A contains upper Cholesky factor; otherwise A contains a lower one. U - array[N], rank-1 update to A: A_mod = A + u*u' Exception is thrown if array size is too small. BufR - possibly preallocated buffer; automatically resized if needed. It is recommended to reuse this buffer if you perform a lot of subsequent decompositions. OUTPUT PARAMETERS: A - updated factorization. If IsUpper=True, then the upper triangle contains matrix U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. -- ALGLIB -- 03.02.2014 Sergey Bochkanov *************************************************************************/ public static void spdmatrixcholeskyupdateadd1buf(double[,] a, int n, bool isupper, double[] u, ref double[] bufr) { int i = 0; int j = 0; int nz = 0; double cs = 0; double sn = 0; double v = 0; double vv = 0; alglib.ap.assert(n>0, "SPDMatrixCholeskyUpdateAdd1Buf: N<=0"); alglib.ap.assert(alglib.ap.rows(a)>=n, "SPDMatrixCholeskyUpdateAdd1Buf: Rows(A)=n, "SPDMatrixCholeskyUpdateAdd1Buf: Cols(A)=n, "SPDMatrixCholeskyUpdateAdd1Buf: Length(U)0 IsUpper - if IsUpper=True, then A contains upper Cholesky factor; otherwise A contains a lower one. Fix - array[N], I-th element is True if I-th variable must be fixed. Exception is thrown if array size is too small. BufR - possibly preallocated buffer; automatically resized if needed. It is recommended to reuse this buffer if you perform a lot of subsequent decompositions. OUTPUT PARAMETERS: A - updated factorization. If IsUpper=True, then the upper triangle contains matrix U, and the elements below the main diagonal are not modified. Similarly, if IsUpper = False. -- ALGLIB -- 03.02.2014 Sergey Bochkanov *************************************************************************/ public static void spdmatrixcholeskyupdatefixbuf(double[,] a, int n, bool isupper, bool[] fix, ref double[] bufr) { int i = 0; int j = 0; int k = 0; int nfix = 0; int idx = 0; double cs = 0; double sn = 0; double v = 0; double vv = 0; alglib.ap.assert(n>0, "SPDMatrixCholeskyUpdateFixBuf: N<=0"); alglib.ap.assert(alglib.ap.rows(a)>=n, "SPDMatrixCholeskyUpdateFixBuf: Rows(A)=n, "SPDMatrixCholeskyUpdateFixBuf: Cols(A)=n, "SPDMatrixCholeskyUpdateFixBuf: Length(Fix)=0, "SparseCholeskySkyline: N<0"); alglib.ap.assert(sparse.sparsegetnrows(a)>=n, "SparseCholeskySkyline: rows(A)=n, "SparseCholeskySkyline: cols(A)=BANDWIDTH(A1), I-th equation is reduced from // L[I,0]*A1[0] + L[I,1]*A1[1] + ... + L[I,I]*A1[I] = A[I] // to // L[I,JNZ]*A1[JNZ] + ... + L[I,I]*A1[I] = A[I] // where JNZ = max(NReady-BANDWIDTH(A1),I-BANDWIDTH(L[i])) // (JNZ is an index of the firts column where both A and L become // nonzero). // // NOTE: we rely on details of SparseMatrix internal storage format. // This is allowed by SparseMatrix specification. // a12 = 0.0; if( a.didx[nready]>0 ) { banda = a.didx[nready]; for(i=nready-banda; i<=nready-1; i++) { // // Elements of A1[0:I-1] were computed: // * A1[0:NReady-BandA-1] are zero (sparse) // * A1[NReady-BandA:I-1] replaced corresponding elements of A // // Now it is time to get I-th one. // // First, we calculate: // * JNZA - index of the first column where A become nonzero // * JNZL - index of the first column where L become nonzero // * JNZ - index of the first column where both A and L become nonzero // * OffsA - offset of A[JNZ] in A.Vals // * OffsL - offset of L[I,JNZ] in A.Vals // // Then, we solve SUM(A1[j]*L[I,j],j=JNZ..I-1) + A1[I]*L[I,I] = A[I], // with A1[JNZ..I-1] already known, and A1[I] unknown. // jnza = nready-banda; jnzl = i-a.didx[i]; jnz = Math.Max(jnza, jnzl); offsa = a.ridx[nready]+(jnz-jnza); offsl = a.ridx[i]+(jnz-jnzl); v = 0.0; k = i-1-jnz; for(j=0; j<=k; j++) { v = v+a.vals[offsa+j]*a.vals[offsl+j]; } vv = (a.vals[offsa+k+1]-v)/a.vals[offsl+k+1]; a.vals[offsa+k+1] = vv; a12 = a12+vv*vv; } } // // Calculate CHOLESKY(B-A1*A1') // offsa = a.ridx[nready]+a.didx[nready]; v = a.vals[offsa]; if( (double)(v)<=(double)(a12) ) { result = false; return result; } a.vals[offsa] = Math.Sqrt(v-a12); // // Increase size of the updated matrix // apserv.inc(ref nready); } // // transpose if needed // if( isupper ) { sparse.sparsetransposesks(a); } result = true; return result; } /************************************************************************* Sparse Cholesky decomposition: "expert" function. The algorithm computes Cholesky decomposition of a symmetric positive- definite sparse matrix. The result is representation of A as A=U^T*U or A=L*L^T Triangular factor L or U is written to separate SparseMatrix structure. If output buffer already contrains enough memory to store L/U, this memory is reused. INPUT PARAMETERS: A - upper or lower triangle of sparse matrix. Matrix can be in any sparse storage format. N - size of matrix A (can be smaller than actual size of A) IsUpper - if IsUpper=True, then A contains an upper triangle of a symmetric matrix, otherwise A contains a lower one. Another triangle is ignored. P0, P1 - integer arrays: * for Ordering=-3 - user-supplied permutation of rows/ columns, which complies to requirements stated in the "OUTPUT PARAMETERS" section. Both P0 and P1 must be initialized by user. * for other values of Ordering - possibly preallocated buffer, which is filled by internally generated permutation. Automatically resized if its size is too small to store data. Ordering- sparse matrix reordering algorithm which is used to reduce fill-in amount: * -3 use ordering supplied by user in P0/P1 * -2 use random ordering * -1 use original order * 0 use best algorithm implemented so far If input matrix is given in SKS format, factorization function ignores Ordering and uses original order of the columns. The idea is that if you already store matrix in SKS format, it is better not to perform costly reordering. Algo - type of algorithm which is used during factorization: * 0 use best algorithm (for SKS input or output matrices Algo=2 is used; otherwise Algo=1 is used) * 1 use CRS-based algorithm * 2 use skyline-based factorization algorithm. This algorithm is a fastest one for low-profile matrices, but requires too much of memory for matrices with large bandwidth. Fmt - desired storage format of the output, as returned by SparseGetMatrixType() function: * 0 for hash-based storage * 1 for CRS * 2 for SKS If you do not know what format to choose, use 1 (CRS). Buf - SparseBuffers structure which is used to store temporaries. This function may reuse previously allocated storage, so if you perform repeated factorizations it is beneficial to reuse Buf. C - SparseMatrix structure which can be just some random garbage. In case in contains enough memory to store triangular factors, this memory will be reused. Othwerwise, algorithm will automatically allocate enough memory. OUTPUT PARAMETERS: C - the result of factorization, stored in desired format. If IsUpper=True, then the upper triangle contains matrix U, such that (P'*A*P) = U^T*U, where P is a permutation matrix (see below). The elements below the main diagonal are zero. Similarly, if IsUpper = False. In this case L is returned, and we have (P'*A*P) = L*(L^T). P0 - permutation (according to Ordering parameter) which minimizes amount of fill-in: * P0 is array[N] * permutation is applied to A before factorization takes place, i.e. we have U'*U = L*L' = P'*A*P * P0[k]=j means that column/row j of A is moved to k-th position before starting factorization. P1 - permutation P in another format, array[N]: * P1[k]=j means that k-th column/row of A is moved to j-th position RESULT: If the matrix is positive-definite, the function returns True. Otherwise, the function returns False. Contents of C is not determined in such case. NOTE: for performance reasons this function does NOT check that input matrix includes only finite values. It is your responsibility to make sure that there are no infinite or NAN values in the matrix. -- ALGLIB routine -- 16.01.2014 Bochkanov Sergey *************************************************************************/ public static bool sparsecholeskyx(sparse.sparsematrix a, int n, bool isupper, ref int[] p0, ref int[] p1, int ordering, int algo, int fmt, sparse.sparsebuffers buf, sparse.sparsematrix c) { bool result = new bool(); int i = 0; int j = 0; int k = 0; int t0 = 0; int t1 = 0; double v = 0; hqrnd.hqrndstate rs = new hqrnd.hqrndstate(); alglib.ap.assert(n>=0, "SparseMatrixCholeskyBuf: N<0"); alglib.ap.assert(sparse.sparsegetnrows(a)>=n, "SparseMatrixCholeskyBuf: rows(A)=n, "SparseMatrixCholeskyBuf: cols(A)=-3 && ordering<=0, "SparseMatrixCholeskyBuf: invalid Ordering parameter"); alglib.ap.assert(algo>=0 && algo<=2, "SparseMatrixCholeskyBuf: invalid Algo parameter"); hqrnd.hqrndrandomize(rs); // // Perform some quick checks. // Because sparse matrices are expensive data structures, these // checks are better to perform during early stages of the factorization. // result = false; if( n<1 ) { return result; } for(i=0; i<=n-1; i++) { if( (double)(sparse.sparsegetdiagonal(a, i))<=(double)(0) ) { return result; } } // // First, determine appropriate ordering: // * for SKS inputs, Ordering=-1 is automatically chosen (overrides user settings) // if( ordering==0 ) { ordering = -1; } if( sparse.sparseissks(a) ) { ordering = -1; } if( ordering==-3 ) { // // User-supplied ordering. // Check its correctness. // alglib.ap.assert(alglib.ap.len(p0)>=n, "SparseCholeskyX: user-supplied permutation is too short"); alglib.ap.assert(alglib.ap.len(p1)>=n, "SparseCholeskyX: user-supplied permutation is too short"); for(i=0; i<=n-1; i++) { alglib.ap.assert(p0[i]>=0 && p0[i]=0 && p1[i]=i) || (!isupper && j<=i) ) { i = p1[i]; j = p1[j]; if( (ji && !isupper) ) { apserv.swapi(ref i, ref j); } if( i>j ) { buf.d[i] = Math.Max(buf.d[i], i-j); } else { buf.u[j] = Math.Max(buf.u[j], j-i); } } } sparse.sparsecreatesksbuf(n, n, buf.d, buf.u, c); t0 = 0; t1 = 0; while( sparse.sparseenumerate(a, ref t0, ref t1, ref i, ref j, ref v) ) { if( (isupper && j>=i) || (!isupper && j<=i) ) { i = p1[i]; j = p1[j]; if( (ji && !isupper) ) { apserv.swapi(ref j, ref i); } sparse.sparserewriteexisting(c, i, j, v); } } } result = sparsecholeskyskyline(c, n, isupper); return result; } alglib.ap.assert(false, "SparseCholeskyX: internal error - unexpected algorithm"); return result; } public static void rmatrixlup(ref double[,] a, int m, int n, ref int[] pivots) { double[] tmp = new double[0]; int i = 0; int j = 0; double mx = 0; double v = 0; int i_ = 0; pivots = new int[0]; // // Internal LU decomposition subroutine. // Never call it directly. // alglib.ap.assert(m>0, "RMatrixLUP: incorrect M!"); alglib.ap.assert(n>0, "RMatrixLUP: incorrect N!"); // // Scale matrix to avoid overflows, // decompose it, then scale back. // mx = 0; for(i=0; i<=m-1; i++) { for(j=0; j<=n-1; j++) { mx = Math.Max(mx, Math.Abs(a[i,j])); } } if( (double)(mx)!=(double)(0) ) { v = 1/mx; for(i=0; i<=m-1; i++) { for(i_=0; i_<=n-1;i_++) { a[i,i_] = v*a[i,i_]; } } } pivots = new int[Math.Min(m, n)]; tmp = new double[2*Math.Max(m, n)]; rmatrixluprec(ref a, 0, m, n, ref pivots, ref tmp); if( (double)(mx)!=(double)(0) ) { v = mx; for(i=0; i<=m-1; i++) { for(i_=0; i_<=Math.Min(i, n-1);i_++) { a[i,i_] = v*a[i,i_]; } } } } public static void cmatrixlup(ref complex[,] a, int m, int n, ref int[] pivots) { complex[] tmp = new complex[0]; int i = 0; int j = 0; double mx = 0; double v = 0; int i_ = 0; pivots = new int[0]; // // Internal LU decomposition subroutine. // Never call it directly. // alglib.ap.assert(m>0, "CMatrixLUP: incorrect M!"); alglib.ap.assert(n>0, "CMatrixLUP: incorrect N!"); // // Scale matrix to avoid overflows, // decompose it, then scale back. // mx = 0; for(i=0; i<=m-1; i++) { for(j=0; j<=n-1; j++) { mx = Math.Max(mx, math.abscomplex(a[i,j])); } } if( (double)(mx)!=(double)(0) ) { v = 1/mx; for(i=0; i<=m-1; i++) { for(i_=0; i_<=n-1;i_++) { a[i,i_] = v*a[i,i_]; } } } pivots = new int[Math.Min(m, n)]; tmp = new complex[2*Math.Max(m, n)]; cmatrixluprec(ref a, 0, m, n, ref pivots, ref tmp); if( (double)(mx)!=(double)(0) ) { v = mx; for(i=0; i<=m-1; i++) { for(i_=0; i_<=Math.Min(i, n-1);i_++) { a[i,i_] = v*a[i,i_]; } } } } public static void rmatrixplu(ref double[,] a, int m, int n, ref int[] pivots) { double[] tmp = new double[0]; int i = 0; int j = 0; double mx = 0; double v = 0; int i_ = 0; pivots = new int[0]; // // Internal LU decomposition subroutine. // Never call it directly. // alglib.ap.assert(m>0, "RMatrixPLU: incorrect M!"); alglib.ap.assert(n>0, "RMatrixPLU: incorrect N!"); tmp = new double[2*Math.Max(m, n)]; pivots = new int[Math.Min(m, n)]; // // Scale matrix to avoid overflows, // decompose it, then scale back. // mx = 0; for(i=0; i<=m-1; i++) { for(j=0; j<=n-1; j++) { mx = Math.Max(mx, Math.Abs(a[i,j])); } } if( (double)(mx)!=(double)(0) ) { v = 1/mx; for(i=0; i<=m-1; i++) { for(i_=0; i_<=n-1;i_++) { a[i,i_] = v*a[i,i_]; } } } rmatrixplurec(ref a, 0, m, n, ref pivots, ref tmp); if( (double)(mx)!=(double)(0) ) { v = mx; for(i=0; i<=Math.Min(m, n)-1; i++) { for(i_=i; i_<=n-1;i_++) { a[i,i_] = v*a[i,i_]; } } } } public static void cmatrixplu(ref complex[,] a, int m, int n, ref int[] pivots) { complex[] tmp = new complex[0]; int i = 0; int j = 0; double mx = 0; complex v = 0; int i_ = 0; pivots = new int[0]; // // Internal LU decomposition subroutine. // Never call it directly. // alglib.ap.assert(m>0, "CMatrixPLU: incorrect M!"); alglib.ap.assert(n>0, "CMatrixPLU: incorrect N!"); tmp = new complex[2*Math.Max(m, n)]; pivots = new int[Math.Min(m, n)]; // // Scale matrix to avoid overflows, // decompose it, then scale back. // mx = 0; for(i=0; i<=m-1; i++) { for(j=0; j<=n-1; j++) { mx = Math.Max(mx, math.abscomplex(a[i,j])); } } if( (double)(mx)!=(double)(0) ) { v = 1/mx; for(i=0; i<=m-1; i++) { for(i_=0; i_<=n-1;i_++) { a[i,i_] = v*a[i,i_]; } } } cmatrixplurec(ref a, 0, m, n, ref pivots, ref tmp); if( (double)(mx)!=(double)(0) ) { v = mx; for(i=0; i<=Math.Min(m, n)-1; i++) { for(i_=i; i_<=n-1;i_++) { a[i,i_] = v*a[i,i_]; } } } } /************************************************************************* Recursive computational subroutine for SPDMatrixCholesky. INPUT PARAMETERS: A - matrix given by upper or lower triangle Offs - offset of diagonal block to decompose N - diagonal block size IsUpper - what half is given Tmp - temporary array; allocated by function, if its size is too small; can be reused on subsequent calls. OUTPUT PARAMETERS: A - upper (or lower) triangle contains Cholesky decomposition RESULT: True, on success False, on failure -- ALGLIB routine -- 15.12.2009 Bochkanov Sergey *************************************************************************/ public static bool spdmatrixcholeskyrec(ref double[,] a, int offs, int n, bool isupper, ref double[] tmp) { bool result = new bool(); int n1 = 0; int n2 = 0; // // check N // if( n<1 ) { result = false; return result; } // // Prepare buffer // if( alglib.ap.len(tmp)<2*n ) { tmp = new double[2*n]; } // // special cases // // NOTE: we do not use MKL to accelerate Cholesky basecase // because basecase cost is negligible when compared to // the cost of entire decomposition (most time is spent // in GEMM snd SYRK). // if( n==1 ) { if( (double)(a[offs,offs])>(double)(0) ) { a[offs,offs] = Math.Sqrt(a[offs,offs]); result = true; } else { result = false; } return result; } if( n<=ablas.ablasblocksize(a) ) { result = spdmatrixcholesky2(ref a, offs, n, isupper, ref tmp); return result; } // // general case: split task in cache-oblivious manner // result = true; ablas.ablassplitlength(a, n, ref n1, ref n2); result = spdmatrixcholeskyrec(ref a, offs, n1, isupper, ref tmp); if( !result ) { return result; } if( n2>0 ) { if( isupper ) { ablas.rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, false, 1, a, offs, offs+n1); ablas.rmatrixsyrk(n2, n1, -1.0, a, offs, offs+n1, 1, 1.0, a, offs+n1, offs+n1, isupper); } else { ablas.rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, false, 1, a, offs+n1, offs); ablas.rmatrixsyrk(n2, n1, -1.0, a, offs+n1, offs, 0, 1.0, a, offs+n1, offs+n1, isupper); } result = spdmatrixcholeskyrec(ref a, offs+n1, n2, isupper, ref tmp); if( !result ) { return result; } } return result; } /************************************************************************* Recurrent complex LU subroutine. Never call it directly. -- ALGLIB routine -- 04.01.2010 Bochkanov Sergey *************************************************************************/ private static void cmatrixluprec(ref complex[,] a, int offs, int m, int n, ref int[] pivots, ref complex[] tmp) { int i = 0; int m1 = 0; int m2 = 0; int i_ = 0; int i1_ = 0; // // Kernel case // if( Math.Min(m, n)<=ablas.ablascomplexblocksize(a) ) { cmatrixlup2(ref a, offs, m, n, ref pivots, ref tmp); return; } // // Preliminary step, make N>=M // // ( A1 ) // A = ( ), where A1 is square // ( A2 ) // // Factorize A1, update A2 // if( m>n ) { cmatrixluprec(ref a, offs, n, n, ref pivots, ref tmp); for(i=0; i<=n-1; i++) { i1_ = (offs+n) - (0); for(i_=0; i_<=m-n-1;i_++) { tmp[i_] = a[i_+i1_,offs+i]; } for(i_=offs+n; i_<=offs+m-1;i_++) { a[i_,offs+i] = a[i_,pivots[offs+i]]; } i1_ = (0) - (offs+n); for(i_=offs+n; i_<=offs+m-1;i_++) { a[i_,pivots[offs+i]] = tmp[i_+i1_]; } } ablas.cmatrixrighttrsm(m-n, n, a, offs, offs, true, true, 0, a, offs+n, offs); return; } // // Non-kernel case // ablas.ablascomplexsplitlength(a, m, ref m1, ref m2); cmatrixluprec(ref a, offs, m1, n, ref pivots, ref tmp); if( m2>0 ) { for(i=0; i<=m1-1; i++) { if( offs+i!=pivots[offs+i] ) { i1_ = (offs+m1) - (0); for(i_=0; i_<=m2-1;i_++) { tmp[i_] = a[i_+i1_,offs+i]; } for(i_=offs+m1; i_<=offs+m-1;i_++) { a[i_,offs+i] = a[i_,pivots[offs+i]]; } i1_ = (0) - (offs+m1); for(i_=offs+m1; i_<=offs+m-1;i_++) { a[i_,pivots[offs+i]] = tmp[i_+i1_]; } } } ablas.cmatrixrighttrsm(m2, m1, a, offs, offs, true, true, 0, a, offs+m1, offs); ablas.cmatrixgemm(m-m1, n-m1, m1, -1.0, a, offs+m1, offs, 0, a, offs, offs+m1, 0, 1.0, a, offs+m1, offs+m1); cmatrixluprec(ref a, offs+m1, m-m1, n-m1, ref pivots, ref tmp); for(i=0; i<=m2-1; i++) { if( offs+m1+i!=pivots[offs+m1+i] ) { i1_ = (offs) - (0); for(i_=0; i_<=m1-1;i_++) { tmp[i_] = a[i_+i1_,offs+m1+i]; } for(i_=offs; i_<=offs+m1-1;i_++) { a[i_,offs+m1+i] = a[i_,pivots[offs+m1+i]]; } i1_ = (0) - (offs); for(i_=offs; i_<=offs+m1-1;i_++) { a[i_,pivots[offs+m1+i]] = tmp[i_+i1_]; } } } } } /************************************************************************* Recurrent real LU subroutine. Never call it directly. -- ALGLIB routine -- 04.01.2010 Bochkanov Sergey *************************************************************************/ private static void rmatrixluprec(ref double[,] a, int offs, int m, int n, ref int[] pivots, ref double[] tmp) { int i = 0; int m1 = 0; int m2 = 0; int i_ = 0; int i1_ = 0; // // Kernel case // if( Math.Min(m, n)<=ablas.ablasblocksize(a) ) { rmatrixlup2(ref a, offs, m, n, ref pivots, ref tmp); return; } // // Preliminary step, make N>=M // // ( A1 ) // A = ( ), where A1 is square // ( A2 ) // // Factorize A1, update A2 // if( m>n ) { rmatrixluprec(ref a, offs, n, n, ref pivots, ref tmp); for(i=0; i<=n-1; i++) { if( offs+i!=pivots[offs+i] ) { i1_ = (offs+n) - (0); for(i_=0; i_<=m-n-1;i_++) { tmp[i_] = a[i_+i1_,offs+i]; } for(i_=offs+n; i_<=offs+m-1;i_++) { a[i_,offs+i] = a[i_,pivots[offs+i]]; } i1_ = (0) - (offs+n); for(i_=offs+n; i_<=offs+m-1;i_++) { a[i_,pivots[offs+i]] = tmp[i_+i1_]; } } } ablas.rmatrixrighttrsm(m-n, n, a, offs, offs, true, true, 0, a, offs+n, offs); return; } // // Non-kernel case // ablas.ablassplitlength(a, m, ref m1, ref m2); rmatrixluprec(ref a, offs, m1, n, ref pivots, ref tmp); if( m2>0 ) { for(i=0; i<=m1-1; i++) { if( offs+i!=pivots[offs+i] ) { i1_ = (offs+m1) - (0); for(i_=0; i_<=m2-1;i_++) { tmp[i_] = a[i_+i1_,offs+i]; } for(i_=offs+m1; i_<=offs+m-1;i_++) { a[i_,offs+i] = a[i_,pivots[offs+i]]; } i1_ = (0) - (offs+m1); for(i_=offs+m1; i_<=offs+m-1;i_++) { a[i_,pivots[offs+i]] = tmp[i_+i1_]; } } } ablas.rmatrixrighttrsm(m2, m1, a, offs, offs, true, true, 0, a, offs+m1, offs); ablas.rmatrixgemm(m-m1, n-m1, m1, -1.0, a, offs+m1, offs, 0, a, offs, offs+m1, 0, 1.0, a, offs+m1, offs+m1); rmatrixluprec(ref a, offs+m1, m-m1, n-m1, ref pivots, ref tmp); for(i=0; i<=m2-1; i++) { if( offs+m1+i!=pivots[offs+m1+i] ) { i1_ = (offs) - (0); for(i_=0; i_<=m1-1;i_++) { tmp[i_] = a[i_+i1_,offs+m1+i]; } for(i_=offs; i_<=offs+m1-1;i_++) { a[i_,offs+m1+i] = a[i_,pivots[offs+m1+i]]; } i1_ = (0) - (offs); for(i_=offs; i_<=offs+m1-1;i_++) { a[i_,pivots[offs+m1+i]] = tmp[i_+i1_]; } } } } } /************************************************************************* Recurrent complex LU subroutine. Never call it directly. -- ALGLIB routine -- 04.01.2010 Bochkanov Sergey *************************************************************************/ private static void cmatrixplurec(ref complex[,] a, int offs, int m, int n, ref int[] pivots, ref complex[] tmp) { int i = 0; int n1 = 0; int n2 = 0; int i_ = 0; int i1_ = 0; // // Kernel case // if( Math.Min(m, n)<=ablas.ablascomplexblocksize(a) ) { cmatrixplu2(ref a, offs, m, n, ref pivots, ref tmp); return; } // // Preliminary step, make M>=N. // // A = (A1 A2), where A1 is square // Factorize A1, update A2 // if( n>m ) { cmatrixplurec(ref a, offs, m, m, ref pivots, ref tmp); for(i=0; i<=m-1; i++) { i1_ = (offs+m) - (0); for(i_=0; i_<=n-m-1;i_++) { tmp[i_] = a[offs+i,i_+i1_]; } for(i_=offs+m; i_<=offs+n-1;i_++) { a[offs+i,i_] = a[pivots[offs+i],i_]; } i1_ = (0) - (offs+m); for(i_=offs+m; i_<=offs+n-1;i_++) { a[pivots[offs+i],i_] = tmp[i_+i1_]; } } ablas.cmatrixlefttrsm(m, n-m, a, offs, offs, false, true, 0, a, offs, offs+m); return; } // // Non-kernel case // ablas.ablascomplexsplitlength(a, n, ref n1, ref n2); cmatrixplurec(ref a, offs, m, n1, ref pivots, ref tmp); if( n2>0 ) { for(i=0; i<=n1-1; i++) { if( offs+i!=pivots[offs+i] ) { i1_ = (offs+n1) - (0); for(i_=0; i_<=n2-1;i_++) { tmp[i_] = a[offs+i,i_+i1_]; } for(i_=offs+n1; i_<=offs+n-1;i_++) { a[offs+i,i_] = a[pivots[offs+i],i_]; } i1_ = (0) - (offs+n1); for(i_=offs+n1; i_<=offs+n-1;i_++) { a[pivots[offs+i],i_] = tmp[i_+i1_]; } } } ablas.cmatrixlefttrsm(n1, n2, a, offs, offs, false, true, 0, a, offs, offs+n1); ablas.cmatrixgemm(m-n1, n-n1, n1, -1.0, a, offs+n1, offs, 0, a, offs, offs+n1, 0, 1.0, a, offs+n1, offs+n1); cmatrixplurec(ref a, offs+n1, m-n1, n-n1, ref pivots, ref tmp); for(i=0; i<=n2-1; i++) { if( offs+n1+i!=pivots[offs+n1+i] ) { i1_ = (offs) - (0); for(i_=0; i_<=n1-1;i_++) { tmp[i_] = a[offs+n1+i,i_+i1_]; } for(i_=offs; i_<=offs+n1-1;i_++) { a[offs+n1+i,i_] = a[pivots[offs+n1+i],i_]; } i1_ = (0) - (offs); for(i_=offs; i_<=offs+n1-1;i_++) { a[pivots[offs+n1+i],i_] = tmp[i_+i1_]; } } } } } /************************************************************************* Recurrent real LU subroutine. Never call it directly. -- ALGLIB routine -- 04.01.2010 Bochkanov Sergey *************************************************************************/ private static void rmatrixplurec(ref double[,] a, int offs, int m, int n, ref int[] pivots, ref double[] tmp) { int i = 0; int n1 = 0; int n2 = 0; int i_ = 0; int i1_ = 0; // // Basecases // if( ablasmkl.rmatrixplumkl(ref a, offs, m, n, ref pivots) ) { return; } if( Math.Min(m, n)<=ablas.ablasblocksize(a) ) { rmatrixplu2(ref a, offs, m, n, ref pivots, ref tmp); return; } // // Preliminary step, make M>=N. // // A = (A1 A2), where A1 is square // Factorize A1, update A2 // if( n>m ) { rmatrixplurec(ref a, offs, m, m, ref pivots, ref tmp); for(i=0; i<=m-1; i++) { i1_ = (offs+m) - (0); for(i_=0; i_<=n-m-1;i_++) { tmp[i_] = a[offs+i,i_+i1_]; } for(i_=offs+m; i_<=offs+n-1;i_++) { a[offs+i,i_] = a[pivots[offs+i],i_]; } i1_ = (0) - (offs+m); for(i_=offs+m; i_<=offs+n-1;i_++) { a[pivots[offs+i],i_] = tmp[i_+i1_]; } } ablas.rmatrixlefttrsm(m, n-m, a, offs, offs, false, true, 0, a, offs, offs+m); return; } // // Non-kernel case // ablas.ablassplitlength(a, n, ref n1, ref n2); rmatrixplurec(ref a, offs, m, n1, ref pivots, ref tmp); if( n2>0 ) { for(i=0; i<=n1-1; i++) { if( offs+i!=pivots[offs+i] ) { i1_ = (offs+n1) - (0); for(i_=0; i_<=n2-1;i_++) { tmp[i_] = a[offs+i,i_+i1_]; } for(i_=offs+n1; i_<=offs+n-1;i_++) { a[offs+i,i_] = a[pivots[offs+i],i_]; } i1_ = (0) - (offs+n1); for(i_=offs+n1; i_<=offs+n-1;i_++) { a[pivots[offs+i],i_] = tmp[i_+i1_]; } } } ablas.rmatrixlefttrsm(n1, n2, a, offs, offs, false, true, 0, a, offs, offs+n1); ablas.rmatrixgemm(m-n1, n-n1, n1, -1.0, a, offs+n1, offs, 0, a, offs, offs+n1, 0, 1.0, a, offs+n1, offs+n1); rmatrixplurec(ref a, offs+n1, m-n1, n-n1, ref pivots, ref tmp); for(i=0; i<=n2-1; i++) { if( offs+n1+i!=pivots[offs+n1+i] ) { i1_ = (offs) - (0); for(i_=0; i_<=n1-1;i_++) { tmp[i_] = a[offs+n1+i,i_+i1_]; } for(i_=offs; i_<=offs+n1-1;i_++) { a[offs+n1+i,i_] = a[pivots[offs+n1+i],i_]; } i1_ = (0) - (offs); for(i_=offs; i_<=offs+n1-1;i_++) { a[pivots[offs+n1+i],i_] = tmp[i_+i1_]; } } } } } /************************************************************************* Complex LUP kernel -- ALGLIB routine -- 10.01.2010 Bochkanov Sergey *************************************************************************/ private static void cmatrixlup2(ref complex[,] a, int offs, int m, int n, ref int[] pivots, ref complex[] tmp) { int i = 0; int j = 0; int jp = 0; complex s = 0; int i_ = 0; int i1_ = 0; // // Quick return if possible // if( m==0 || n==0 ) { return; } // // main cycle // for(j=0; j<=Math.Min(m-1, n-1); j++) { // // Find pivot, swap columns // jp = j; for(i=j+1; i<=n-1; i++) { if( (double)(math.abscomplex(a[offs+j,offs+i]))>(double)(math.abscomplex(a[offs+j,offs+jp])) ) { jp = i; } } pivots[offs+j] = offs+jp; if( jp!=j ) { i1_ = (offs) - (0); for(i_=0; i_<=m-1;i_++) { tmp[i_] = a[i_+i1_,offs+j]; } for(i_=offs; i_<=offs+m-1;i_++) { a[i_,offs+j] = a[i_,offs+jp]; } i1_ = (0) - (offs); for(i_=offs; i_<=offs+m-1;i_++) { a[i_,offs+jp] = tmp[i_+i1_]; } } // // LU decomposition of 1x(N-J) matrix // if( a[offs+j,offs+j]!=0 && j+1<=n-1 ) { s = 1/a[offs+j,offs+j]; for(i_=offs+j+1; i_<=offs+n-1;i_++) { a[offs+j,i_] = s*a[offs+j,i_]; } } // // Update trailing (M-J-1)x(N-J-1) matrix // if( j(double)(Math.Abs(a[offs+j,offs+jp])) ) { jp = i; } } pivots[offs+j] = offs+jp; if( jp!=j ) { i1_ = (offs) - (0); for(i_=0; i_<=m-1;i_++) { tmp[i_] = a[i_+i1_,offs+j]; } for(i_=offs; i_<=offs+m-1;i_++) { a[i_,offs+j] = a[i_,offs+jp]; } i1_ = (0) - (offs); for(i_=offs; i_<=offs+m-1;i_++) { a[i_,offs+jp] = tmp[i_+i1_]; } } // // LU decomposition of 1x(N-J) matrix // if( (double)(a[offs+j,offs+j])!=(double)(0) && j+1<=n-1 ) { s = 1/a[offs+j,offs+j]; for(i_=offs+j+1; i_<=offs+n-1;i_++) { a[offs+j,i_] = s*a[offs+j,i_]; } } // // Update trailing (M-J-1)x(N-J-1) matrix // if( j(double)(math.abscomplex(a[offs+jp,offs+j])) ) { jp = i; } } pivots[offs+j] = offs+jp; if( a[offs+jp,offs+j]!=0 ) { // //Apply the interchange to rows // if( jp!=j ) { for(i=0; i<=n-1; i++) { s = a[offs+j,offs+i]; a[offs+j,offs+i] = a[offs+jp,offs+i]; a[offs+jp,offs+i] = s; } } // //Compute elements J+1:M of J-th column. // if( j+1<=m-1 ) { s = 1/a[offs+j,offs+j]; for(i_=offs+j+1; i_<=offs+m-1;i_++) { a[i_,offs+j] = s*a[i_,offs+j]; } } } if( j(double)(Math.Abs(a[offs+jp,offs+j])) ) { jp = i; } } pivots[offs+j] = offs+jp; if( (double)(a[offs+jp,offs+j])!=(double)(0) ) { // //Apply the interchange to rows // if( jp!=j ) { for(i=0; i<=n-1; i++) { s = a[offs+j,offs+i]; a[offs+j,offs+i] = a[offs+jp,offs+i]; a[offs+jp,offs+i] = s; } } // //Compute elements J+1:M of J-th column. // if( j+1<=m-1 ) { s = 1/a[offs+j,offs+j]; for(i_=offs+j+1; i_<=offs+m-1;i_++) { a[i_,offs+j] = s*a[i_,offs+j]; } } } if( j256. // if( n==1 ) { if( (double)(a[offs,offs].x)>(double)(0) ) { a[offs,offs] = Math.Sqrt(a[offs,offs].x); result = true; } else { result = false; } return result; } if( n<=ablas.ablascomplexblocksize(a) ) { result = hpdmatrixcholesky2(ref a, offs, n, isupper, ref tmp); return result; } // // general case: split task in cache-oblivious manner // result = true; ablas.ablascomplexsplitlength(a, n, ref n1, ref n2); result = hpdmatrixcholeskyrec(ref a, offs, n1, isupper, ref tmp); if( !result ) { return result; } if( n2>0 ) { if( isupper ) { ablas.cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, false, 2, a, offs, offs+n1); ablas.cmatrixherk(n2, n1, -1.0, a, offs, offs+n1, 2, 1.0, a, offs+n1, offs+n1, isupper); } else { ablas.cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, false, 2, a, offs+n1, offs); ablas.cmatrixherk(n2, n1, -1.0, a, offs+n1, offs, 0, 1.0, a, offs+n1, offs+n1, isupper); } result = hpdmatrixcholeskyrec(ref a, offs+n1, n2, isupper, ref tmp); if( !result ) { return result; } } return result; } /************************************************************************* Level-2 Hermitian Cholesky subroutine. -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 *************************************************************************/ private static bool hpdmatrixcholesky2(ref complex[,] aaa, int offs, int n, bool isupper, ref complex[] tmp) { bool result = new bool(); int i = 0; int j = 0; double ajj = 0; complex v = 0; double r = 0; int i_ = 0; int i1_ = 0; result = true; if( n<0 ) { result = false; return result; } // // Quick return if possible // if( n==0 ) { return result; } if( isupper ) { // // Compute the Cholesky factorization A = U'*U. // for(j=0; j<=n-1; j++) { // // Compute U(J,J) and test for non-positive-definiteness. // v = 0.0; for(i_=offs; i_<=offs+j-1;i_++) { v += math.conj(aaa[i_,offs+j])*aaa[i_,offs+j]; } ajj = (aaa[offs+j,offs+j]-v).x; if( (double)(ajj)<=(double)(0) ) { aaa[offs+j,offs+j] = ajj; result = false; return result; } ajj = Math.Sqrt(ajj); aaa[offs+j,offs+j] = ajj; // // Compute elements J+1:N-1 of row J. // if( j0 ) { i1_ = (offs) - (0); for(i_=0; i_<=j-1;i_++) { tmp[i_] = -math.conj(aaa[i_+i1_,offs+j]); } ablas.cmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, tmp, 0, ref tmp, n); i1_ = (n) - (offs+j+1); for(i_=offs+j+1; i_<=offs+n-1;i_++) { aaa[offs+j,i_] = aaa[offs+j,i_] + tmp[i_+i1_]; } } r = 1/ajj; for(i_=offs+j+1; i_<=offs+n-1;i_++) { aaa[offs+j,i_] = r*aaa[offs+j,i_]; } } } } else { // // Compute the Cholesky factorization A = L*L'. // for(j=0; j<=n-1; j++) { // // Compute L(J+1,J+1) and test for non-positive-definiteness. // v = 0.0; for(i_=offs; i_<=offs+j-1;i_++) { v += math.conj(aaa[offs+j,i_])*aaa[offs+j,i_]; } ajj = (aaa[offs+j,offs+j]-v).x; if( (double)(ajj)<=(double)(0) ) { aaa[offs+j,offs+j] = ajj; result = false; return result; } ajj = Math.Sqrt(ajj); aaa[offs+j,offs+j] = ajj; // // Compute elements J+1:N of column J. // if( j0 ) { i1_ = (offs) - (0); for(i_=0; i_<=j-1;i_++) { tmp[i_] = math.conj(aaa[offs+j,i_+i1_]); } ablas.cmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, tmp, 0, ref tmp, n); for(i=0; i<=n-j-2; i++) { aaa[offs+j+1+i,offs+j] = (aaa[offs+j+1+i,offs+j]-tmp[n+i])/ajj; } } else { for(i=0; i<=n-j-2; i++) { aaa[offs+j+1+i,offs+j] = aaa[offs+j+1+i,offs+j]/ajj; } } } } } return result; } /************************************************************************* Level-2 Cholesky subroutine -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 *************************************************************************/ private static bool spdmatrixcholesky2(ref double[,] aaa, int offs, int n, bool isupper, ref double[] tmp) { bool result = new bool(); int i = 0; int j = 0; double ajj = 0; double v = 0; double r = 0; int i_ = 0; int i1_ = 0; result = true; if( n<0 ) { result = false; return result; } // // Quick return if possible // if( n==0 ) { return result; } if( isupper ) { // // Compute the Cholesky factorization A = U'*U. // for(j=0; j<=n-1; j++) { // // Compute U(J,J) and test for non-positive-definiteness. // v = 0.0; for(i_=offs; i_<=offs+j-1;i_++) { v += aaa[i_,offs+j]*aaa[i_,offs+j]; } ajj = aaa[offs+j,offs+j]-v; if( (double)(ajj)<=(double)(0) ) { aaa[offs+j,offs+j] = ajj; result = false; return result; } ajj = Math.Sqrt(ajj); aaa[offs+j,offs+j] = ajj; // // Compute elements J+1:N-1 of row J. // if( j0 ) { i1_ = (offs) - (0); for(i_=0; i_<=j-1;i_++) { tmp[i_] = -aaa[i_+i1_,offs+j]; } ablas.rmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, tmp, 0, ref tmp, n); i1_ = (n) - (offs+j+1); for(i_=offs+j+1; i_<=offs+n-1;i_++) { aaa[offs+j,i_] = aaa[offs+j,i_] + tmp[i_+i1_]; } } r = 1/ajj; for(i_=offs+j+1; i_<=offs+n-1;i_++) { aaa[offs+j,i_] = r*aaa[offs+j,i_]; } } } } else { // // Compute the Cholesky factorization A = L*L'. // for(j=0; j<=n-1; j++) { // // Compute L(J+1,J+1) and test for non-positive-definiteness. // v = 0.0; for(i_=offs; i_<=offs+j-1;i_++) { v += aaa[offs+j,i_]*aaa[offs+j,i_]; } ajj = aaa[offs+j,offs+j]-v; if( (double)(ajj)<=(double)(0) ) { aaa[offs+j,offs+j] = ajj; result = false; return result; } ajj = Math.Sqrt(ajj); aaa[offs+j,offs+j] = ajj; // // Compute elements J+1:N of column J. // if( j0 ) { i1_ = (offs) - (0); for(i_=0; i_<=j-1;i_++) { tmp[i_] = aaa[offs+j,i_+i1_]; } ablas.rmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, tmp, 0, ref tmp, n); for(i=0; i<=n-j-2; i++) { aaa[offs+j+1+i,offs+j] = (aaa[offs+j+1+i,offs+j]-tmp[n+i])/ajj; } } else { for(i=0; i<=n-j-2; i++) { aaa[offs+j+1+i,offs+j] = aaa[offs+j+1+i,offs+j]/ajj; } } } } } return result; } } public class rcond { /************************************************************************* Estimate of a matrix condition number (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixrcond1(double[,] a, int n) { double result = 0; int i = 0; int j = 0; double v = 0; double nrm = 0; int[] pivots = new int[0]; double[] t = new double[0]; a = (double[,])a.Clone(); alglib.ap.assert(n>=1, "RMatrixRCond1: N<1!"); t = new double[n]; for(i=0; i<=n-1; i++) { t[i] = 0; } for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { t[j] = t[j]+Math.Abs(a[i,j]); } } nrm = 0; for(i=0; i<=n-1; i++) { nrm = Math.Max(nrm, t[i]); } trfac.rmatrixlu(ref a, n, n, ref pivots); rmatrixrcondluinternal(a, n, true, true, nrm, ref v); result = v; return result; } /************************************************************************* Estimate of a matrix condition number (infinity-norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixrcondinf(double[,] a, int n) { double result = 0; int i = 0; int j = 0; double v = 0; double nrm = 0; int[] pivots = new int[0]; a = (double[,])a.Clone(); alglib.ap.assert(n>=1, "RMatrixRCondInf: N<1!"); nrm = 0; for(i=0; i<=n-1; i++) { v = 0; for(j=0; j<=n-1; j++) { v = v+Math.Abs(a[i,j]); } nrm = Math.Max(nrm, v); } trfac.rmatrixlu(ref a, n, n, ref pivots); rmatrixrcondluinternal(a, n, false, true, nrm, ref v); result = v; return result; } /************************************************************************* Condition number estimate of a symmetric positive definite matrix. The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). It should be noted that 1-norm and inf-norm of condition numbers of symmetric matrices are equal, so the algorithm doesn't take into account the differences between these types of norms. Input parameters: A - symmetric positive definite matrix which is given by its upper or lower triangle depending on the value of IsUpper. Array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. Result: 1/LowerBound(cond(A)), if matrix A is positive definite, -1, if matrix A is not positive definite, and its condition number could not be found by this algorithm. NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double spdmatrixrcond(double[,] a, int n, bool isupper) { double result = 0; int i = 0; int j = 0; int j1 = 0; int j2 = 0; double v = 0; double nrm = 0; double[] t = new double[0]; a = (double[,])a.Clone(); t = new double[n]; for(i=0; i<=n-1; i++) { t[i] = 0; } for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i; j2 = n-1; } else { j1 = 0; j2 = i; } for(j=j1; j<=j2; j++) { if( i==j ) { t[i] = t[i]+Math.Abs(a[i,i]); } else { t[i] = t[i]+Math.Abs(a[i,j]); t[j] = t[j]+Math.Abs(a[i,j]); } } } nrm = 0; for(i=0; i<=n-1; i++) { nrm = Math.Max(nrm, t[i]); } if( trfac.spdmatrixcholesky(ref a, n, isupper) ) { spdmatrixrcondcholeskyinternal(a, n, isupper, true, nrm, ref v); result = v; } else { result = -1; } return result; } /************************************************************************* Triangular matrix: estimate of a condition number (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array[0..N-1, 0..N-1]. N - size of A. IsUpper - True, if the matrix is upper triangular. IsUnit - True, if the matrix has a unit diagonal. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixtrrcond1(double[,] a, int n, bool isupper, bool isunit) { double result = 0; int i = 0; int j = 0; double v = 0; double nrm = 0; int[] pivots = new int[0]; double[] t = new double[0]; int j1 = 0; int j2 = 0; alglib.ap.assert(n>=1, "RMatrixTRRCond1: N<1!"); t = new double[n]; for(i=0; i<=n-1; i++) { t[i] = 0; } for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i+1; j2 = n-1; } else { j1 = 0; j2 = i-1; } for(j=j1; j<=j2; j++) { t[j] = t[j]+Math.Abs(a[i,j]); } if( isunit ) { t[i] = t[i]+1; } else { t[i] = t[i]+Math.Abs(a[i,i]); } } nrm = 0; for(i=0; i<=n-1; i++) { nrm = Math.Max(nrm, t[i]); } rmatrixrcondtrinternal(a, n, isupper, isunit, true, nrm, ref v); result = v; return result; } /************************************************************************* Triangular matrix: estimate of a matrix condition number (infinity-norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - True, if the matrix is upper triangular. IsUnit - True, if the matrix has a unit diagonal. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixtrrcondinf(double[,] a, int n, bool isupper, bool isunit) { double result = 0; int i = 0; int j = 0; double v = 0; double nrm = 0; int[] pivots = new int[0]; int j1 = 0; int j2 = 0; alglib.ap.assert(n>=1, "RMatrixTRRCondInf: N<1!"); nrm = 0; for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i+1; j2 = n-1; } else { j1 = 0; j2 = i-1; } v = 0; for(j=j1; j<=j2; j++) { v = v+Math.Abs(a[i,j]); } if( isunit ) { v = v+1; } else { v = v+Math.Abs(a[i,i]); } nrm = Math.Max(nrm, v); } rmatrixrcondtrinternal(a, n, isupper, isunit, false, nrm, ref v); result = v; return result; } /************************************************************************* Condition number estimate of a Hermitian positive definite matrix. The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). It should be noted that 1-norm and inf-norm of condition numbers of symmetric matrices are equal, so the algorithm doesn't take into account the differences between these types of norms. Input parameters: A - Hermitian positive definite matrix which is given by its upper or lower triangle depending on the value of IsUpper. Array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. Result: 1/LowerBound(cond(A)), if matrix A is positive definite, -1, if matrix A is not positive definite, and its condition number could not be found by this algorithm. NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double hpdmatrixrcond(complex[,] a, int n, bool isupper) { double result = 0; int i = 0; int j = 0; int j1 = 0; int j2 = 0; double v = 0; double nrm = 0; double[] t = new double[0]; a = (complex[,])a.Clone(); t = new double[n]; for(i=0; i<=n-1; i++) { t[i] = 0; } for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i; j2 = n-1; } else { j1 = 0; j2 = i; } for(j=j1; j<=j2; j++) { if( i==j ) { t[i] = t[i]+math.abscomplex(a[i,i]); } else { t[i] = t[i]+math.abscomplex(a[i,j]); t[j] = t[j]+math.abscomplex(a[i,j]); } } } nrm = 0; for(i=0; i<=n-1; i++) { nrm = Math.Max(nrm, t[i]); } if( trfac.hpdmatrixcholesky(ref a, n, isupper) ) { hpdmatrixrcondcholeskyinternal(a, n, isupper, true, nrm, ref v); result = v; } else { result = -1; } return result; } /************************************************************************* Estimate of a matrix condition number (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixrcond1(complex[,] a, int n) { double result = 0; int i = 0; int j = 0; double v = 0; double nrm = 0; int[] pivots = new int[0]; double[] t = new double[0]; a = (complex[,])a.Clone(); alglib.ap.assert(n>=1, "CMatrixRCond1: N<1!"); t = new double[n]; for(i=0; i<=n-1; i++) { t[i] = 0; } for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { t[j] = t[j]+math.abscomplex(a[i,j]); } } nrm = 0; for(i=0; i<=n-1; i++) { nrm = Math.Max(nrm, t[i]); } trfac.cmatrixlu(ref a, n, n, ref pivots); cmatrixrcondluinternal(a, n, true, true, nrm, ref v); result = v; return result; } /************************************************************************* Estimate of a matrix condition number (infinity-norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixrcondinf(complex[,] a, int n) { double result = 0; int i = 0; int j = 0; double v = 0; double nrm = 0; int[] pivots = new int[0]; a = (complex[,])a.Clone(); alglib.ap.assert(n>=1, "CMatrixRCondInf: N<1!"); nrm = 0; for(i=0; i<=n-1; i++) { v = 0; for(j=0; j<=n-1; j++) { v = v+math.abscomplex(a[i,j]); } nrm = Math.Max(nrm, v); } trfac.cmatrixlu(ref a, n, n, ref pivots); cmatrixrcondluinternal(a, n, false, true, nrm, ref v); result = v; return result; } /************************************************************************* Estimate of the condition number of a matrix given by its LU decomposition (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: LUA - LU decomposition of a matrix in compact form. Output of the RMatrixLU subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixlurcond1(double[,] lua, int n) { double result = 0; double v = 0; rmatrixrcondluinternal(lua, n, true, false, 0, ref v); result = v; return result; } /************************************************************************* Estimate of the condition number of a matrix given by its LU decomposition (infinity norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: LUA - LU decomposition of a matrix in compact form. Output of the RMatrixLU subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double rmatrixlurcondinf(double[,] lua, int n) { double result = 0; double v = 0; rmatrixrcondluinternal(lua, n, false, false, 0, ref v); result = v; return result; } /************************************************************************* Condition number estimate of a symmetric positive definite matrix given by Cholesky decomposition. The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). It should be noted that 1-norm and inf-norm condition numbers of symmetric matrices are equal, so the algorithm doesn't take into account the differences between these types of norms. Input parameters: CD - Cholesky decomposition of matrix A, output of SMatrixCholesky subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double spdmatrixcholeskyrcond(double[,] a, int n, bool isupper) { double result = 0; double v = 0; spdmatrixrcondcholeskyinternal(a, n, isupper, false, 0, ref v); result = v; return result; } /************************************************************************* Condition number estimate of a Hermitian positive definite matrix given by Cholesky decomposition. The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). It should be noted that 1-norm and inf-norm condition numbers of symmetric matrices are equal, so the algorithm doesn't take into account the differences between these types of norms. Input parameters: CD - Cholesky decomposition of matrix A, output of SMatrixCholesky subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double hpdmatrixcholeskyrcond(complex[,] a, int n, bool isupper) { double result = 0; double v = 0; hpdmatrixrcondcholeskyinternal(a, n, isupper, false, 0, ref v); result = v; return result; } /************************************************************************* Estimate of the condition number of a matrix given by its LU decomposition (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: LUA - LU decomposition of a matrix in compact form. Output of the CMatrixLU subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixlurcond1(complex[,] lua, int n) { double result = 0; double v = 0; alglib.ap.assert(n>=1, "CMatrixLURCond1: N<1!"); cmatrixrcondluinternal(lua, n, true, false, 0.0, ref v); result = v; return result; } /************************************************************************* Estimate of the condition number of a matrix given by its LU decomposition (infinity norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: LUA - LU decomposition of a matrix in compact form. Output of the CMatrixLU subroutine. N - size of matrix A. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixlurcondinf(complex[,] lua, int n) { double result = 0; double v = 0; alglib.ap.assert(n>=1, "CMatrixLURCondInf: N<1!"); cmatrixrcondluinternal(lua, n, false, false, 0.0, ref v); result = v; return result; } /************************************************************************* Triangular matrix: estimate of a condition number (1-norm) The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array[0..N-1, 0..N-1]. N - size of A. IsUpper - True, if the matrix is upper triangular. IsUnit - True, if the matrix has a unit diagonal. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixtrrcond1(complex[,] a, int n, bool isupper, bool isunit) { double result = 0; int i = 0; int j = 0; double v = 0; double nrm = 0; int[] pivots = new int[0]; double[] t = new double[0]; int j1 = 0; int j2 = 0; alglib.ap.assert(n>=1, "RMatrixTRRCond1: N<1!"); t = new double[n]; for(i=0; i<=n-1; i++) { t[i] = 0; } for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i+1; j2 = n-1; } else { j1 = 0; j2 = i-1; } for(j=j1; j<=j2; j++) { t[j] = t[j]+math.abscomplex(a[i,j]); } if( isunit ) { t[i] = t[i]+1; } else { t[i] = t[i]+math.abscomplex(a[i,i]); } } nrm = 0; for(i=0; i<=n-1; i++) { nrm = Math.Max(nrm, t[i]); } cmatrixrcondtrinternal(a, n, isupper, isunit, true, nrm, ref v); result = v; return result; } /************************************************************************* Triangular matrix: estimate of a matrix condition number (infinity-norm). The algorithm calculates a lower bound of the condition number. In this case, the algorithm does not return a lower bound of the condition number, but an inverse number (to avoid an overflow in case of a singular matrix). Input parameters: A - matrix. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - True, if the matrix is upper triangular. IsUnit - True, if the matrix has a unit diagonal. Result: 1/LowerBound(cond(A)) NOTE: if k(A) is very large, then matrix is assumed degenerate, k(A)=INF, 0.0 is returned in such cases. *************************************************************************/ public static double cmatrixtrrcondinf(complex[,] a, int n, bool isupper, bool isunit) { double result = 0; int i = 0; int j = 0; double v = 0; double nrm = 0; int[] pivots = new int[0]; int j1 = 0; int j2 = 0; alglib.ap.assert(n>=1, "RMatrixTRRCondInf: N<1!"); nrm = 0; for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i+1; j2 = n-1; } else { j1 = 0; j2 = i-1; } v = 0; for(j=j1; j<=j2; j++) { v = v+math.abscomplex(a[i,j]); } if( isunit ) { v = v+1; } else { v = v+math.abscomplex(a[i,i]); } nrm = Math.Max(nrm, v); } cmatrixrcondtrinternal(a, n, isupper, isunit, false, nrm, ref v); result = v; return result; } /************************************************************************* Threshold for rcond: matrices with condition number beyond this threshold are considered singular. Threshold must be far enough from underflow, at least Sqr(Threshold) must be greater than underflow. *************************************************************************/ public static double rcondthreshold() { double result = 0; result = Math.Sqrt(Math.Sqrt(math.minrealnumber)); return result; } /************************************************************************* Internal subroutine for condition number estimation -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 *************************************************************************/ private static void rmatrixrcondtrinternal(double[,] a, int n, bool isupper, bool isunit, bool onenorm, double anorm, ref double rc) { double[] ex = new double[0]; double[] ev = new double[0]; int[] iwork = new int[0]; double[] tmp = new double[0]; int i = 0; int j = 0; int kase = 0; int kase1 = 0; int j1 = 0; int j2 = 0; double ainvnm = 0; double maxgrowth = 0; double s = 0; rc = 0; // // RC=0 if something happens // rc = 0; // // init // if( onenorm ) { kase1 = 1; } else { kase1 = 2; } iwork = new int[n+1]; tmp = new double[n]; // // prepare parameters for triangular solver // maxgrowth = 1/rcondthreshold(); s = 0; for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i+1; j2 = n-1; } else { j1 = 0; j2 = i-1; } for(j=j1; j<=j2; j++) { s = Math.Max(s, Math.Abs(a[i,j])); } if( isunit ) { s = Math.Max(s, 1); } else { s = Math.Max(s, Math.Abs(a[i,i])); } } if( (double)(s)==(double)(0) ) { s = 1; } s = 1/s; // // Scale according to S // anorm = anorm*s; // // Quick return if possible // We assume that ANORM<>0 after this block // if( (double)(anorm)==(double)(0) ) { return; } if( n==1 ) { rc = 1; return; } // // Estimate the norm of inv(A). // ainvnm = 0; kase = 0; while( true ) { rmatrixestimatenorm(n, ref ev, ref ex, ref iwork, ref ainvnm, ref kase); if( kase==0 ) { break; } // // from 1-based array to 0-based // for(i=0; i<=n-1; i++) { ex[i] = ex[i+1]; } // // multiply by inv(A) or inv(A') // if( kase==kase1 ) { // // multiply by inv(A) // if( !safesolve.rmatrixscaledtrsafesolve(a, s, n, ref ex, isupper, 0, isunit, maxgrowth) ) { return; } } else { // // multiply by inv(A') // if( !safesolve.rmatrixscaledtrsafesolve(a, s, n, ref ex, isupper, 1, isunit, maxgrowth) ) { return; } } // // from 0-based array to 1-based // for(i=n-1; i>=0; i--) { ex[i+1] = ex[i]; } } // // Compute the estimate of the reciprocal condition number. // if( (double)(ainvnm)!=(double)(0) ) { rc = 1/ainvnm; rc = rc/anorm; if( (double)(rc)<(double)(rcondthreshold()) ) { rc = 0; } } } /************************************************************************* Condition number estimation -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 *************************************************************************/ private static void cmatrixrcondtrinternal(complex[,] a, int n, bool isupper, bool isunit, bool onenorm, double anorm, ref double rc) { complex[] ex = new complex[0]; complex[] cwork2 = new complex[0]; complex[] cwork3 = new complex[0]; complex[] cwork4 = new complex[0]; int[] isave = new int[0]; double[] rsave = new double[0]; int kase = 0; int kase1 = 0; double ainvnm = 0; int i = 0; int j = 0; int j1 = 0; int j2 = 0; double s = 0; double maxgrowth = 0; rc = 0; // // RC=0 if something happens // rc = 0; // // init // if( n<=0 ) { return; } if( n==0 ) { rc = 1; return; } cwork2 = new complex[n+1]; // // prepare parameters for triangular solver // maxgrowth = 1/rcondthreshold(); s = 0; for(i=0; i<=n-1; i++) { if( isupper ) { j1 = i+1; j2 = n-1; } else { j1 = 0; j2 = i-1; } for(j=j1; j<=j2; j++) { s = Math.Max(s, math.abscomplex(a[i,j])); } if( isunit ) { s = Math.Max(s, 1); } else { s = Math.Max(s, math.abscomplex(a[i,i])); } } if( (double)(s)==(double)(0) ) { s = 1; } s = 1/s; // // Scale according to S // anorm = anorm*s; // // Quick return if possible // if( (double)(anorm)==(double)(0) ) { return; } // // Estimate the norm of inv(A). // ainvnm = 0; if( onenorm ) { kase1 = 1; } else { kase1 = 2; } kase = 0; while( true ) { cmatrixestimatenorm(n, ref cwork4, ref ex, ref ainvnm, ref kase, ref isave, ref rsave); if( kase==0 ) { break; } // // From 1-based to 0-based // for(i=0; i<=n-1; i++) { ex[i] = ex[i+1]; } // // multiply by inv(A) or inv(A') // if( kase==kase1 ) { // // multiply by inv(A) // if( !safesolve.cmatrixscaledtrsafesolve(a, s, n, ref ex, isupper, 0, isunit, maxgrowth) ) { return; } } else { // // multiply by inv(A') // if( !safesolve.cmatrixscaledtrsafesolve(a, s, n, ref ex, isupper, 2, isunit, maxgrowth) ) { return; } } // // from 0-based to 1-based // for(i=n-1; i>=0; i--) { ex[i+1] = ex[i]; } } // // Compute the estimate of the reciprocal condition number. // if( (double)(ainvnm)!=(double)(0) ) { rc = 1/ainvnm; rc = rc/anorm; if( (double)(rc)<(double)(rcondthreshold()) ) { rc = 0; } } } /************************************************************************* Internal subroutine for condition number estimation -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 *************************************************************************/ private static void spdmatrixrcondcholeskyinternal(double[,] cha, int n, bool isupper, bool isnormprovided, double anorm, ref double rc) { int i = 0; int j = 0; int kase = 0; double ainvnm = 0; double[] ex = new double[0]; double[] ev = new double[0]; double[] tmp = new double[0]; int[] iwork = new int[0]; double sa = 0; double v = 0; double maxgrowth = 0; int i_ = 0; int i1_ = 0; rc = 0; alglib.ap.assert(n>=1); tmp = new double[n]; // // RC=0 if something happens // rc = 0; // // prepare parameters for triangular solver // maxgrowth = 1/rcondthreshold(); sa = 0; if( isupper ) { for(i=0; i<=n-1; i++) { for(j=i; j<=n-1; j++) { sa = Math.Max(sa, math.abscomplex(cha[i,j])); } } } else { for(i=0; i<=n-1; i++) { for(j=0; j<=i; j++) { sa = Math.Max(sa, math.abscomplex(cha[i,j])); } } } if( (double)(sa)==(double)(0) ) { sa = 1; } sa = 1/sa; // // Estimate the norm of A. // if( !isnormprovided ) { kase = 0; anorm = 0; while( true ) { rmatrixestimatenorm(n, ref ev, ref ex, ref iwork, ref anorm, ref kase); if( kase==0 ) { break; } if( isupper ) { // // Multiply by U // for(i=1; i<=n; i++) { i1_ = (i)-(i-1); v = 0.0; for(i_=i-1; i_<=n-1;i_++) { v += cha[i-1,i_]*ex[i_+i1_]; } ex[i] = v; } for(i_=1; i_<=n;i_++) { ex[i_] = sa*ex[i_]; } // // Multiply by U' // for(i=0; i<=n-1; i++) { tmp[i] = 0; } for(i=0; i<=n-1; i++) { v = ex[i+1]; for(i_=i; i_<=n-1;i_++) { tmp[i_] = tmp[i_] + v*cha[i,i_]; } } i1_ = (0) - (1); for(i_=1; i_<=n;i_++) { ex[i_] = tmp[i_+i1_]; } for(i_=1; i_<=n;i_++) { ex[i_] = sa*ex[i_]; } } else { // // Multiply by L' // for(i=0; i<=n-1; i++) { tmp[i] = 0; } for(i=0; i<=n-1; i++) { v = ex[i+1]; for(i_=0; i_<=i;i_++) { tmp[i_] = tmp[i_] + v*cha[i,i_]; } } i1_ = (0) - (1); for(i_=1; i_<=n;i_++) { ex[i_] = tmp[i_+i1_]; } for(i_=1; i_<=n;i_++) { ex[i_] = sa*ex[i_]; } // // Multiply by L // for(i=n; i>=1; i--) { i1_ = (1)-(0); v = 0.0; for(i_=0; i_<=i-1;i_++) { v += cha[i-1,i_]*ex[i_+i1_]; } ex[i] = v; } for(i_=1; i_<=n;i_++) { ex[i_] = sa*ex[i_]; } } } } // // Quick return if possible // if( (double)(anorm)==(double)(0) ) { return; } if( n==1 ) { rc = 1; return; } // // Estimate the 1-norm of inv(A). // kase = 0; while( true ) { rmatrixestimatenorm(n, ref ev, ref ex, ref iwork, ref ainvnm, ref kase); if( kase==0 ) { break; } for(i=0; i<=n-1; i++) { ex[i] = ex[i+1]; } if( isupper ) { // // Multiply by inv(U'). // if( !safesolve.rmatrixscaledtrsafesolve(cha, sa, n, ref ex, isupper, 1, false, maxgrowth) ) { return; } // // Multiply by inv(U). // if( !safesolve.rmatrixscaledtrsafesolve(cha, sa, n, ref ex, isupper, 0, false, maxgrowth) ) { return; } } else { // // Multiply by inv(L). // if( !safesolve.rmatrixscaledtrsafesolve(cha, sa, n, ref ex, isupper, 0, false, maxgrowth) ) { return; } // // Multiply by inv(L'). // if( !safesolve.rmatrixscaledtrsafesolve(cha, sa, n, ref ex, isupper, 1, false, maxgrowth) ) { return; } } for(i=n-1; i>=0; i--) { ex[i+1] = ex[i]; } } // // Compute the estimate of the reciprocal condition number. // if( (double)(ainvnm)!=(double)(0) ) { v = 1/ainvnm; rc = v/anorm; if( (double)(rc)<(double)(rcondthreshold()) ) { rc = 0; } } } /************************************************************************* Internal subroutine for condition number estimation -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 *************************************************************************/ private static void hpdmatrixrcondcholeskyinternal(complex[,] cha, int n, bool isupper, bool isnormprovided, double anorm, ref double rc) { int[] isave = new int[0]; double[] rsave = new double[0]; complex[] ex = new complex[0]; complex[] ev = new complex[0]; complex[] tmp = new complex[0]; int kase = 0; double ainvnm = 0; complex v = 0; int i = 0; int j = 0; double sa = 0; double maxgrowth = 0; int i_ = 0; int i1_ = 0; rc = 0; alglib.ap.assert(n>=1); tmp = new complex[n]; // // RC=0 if something happens // rc = 0; // // prepare parameters for triangular solver // maxgrowth = 1/rcondthreshold(); sa = 0; if( isupper ) { for(i=0; i<=n-1; i++) { for(j=i; j<=n-1; j++) { sa = Math.Max(sa, math.abscomplex(cha[i,j])); } } } else { for(i=0; i<=n-1; i++) { for(j=0; j<=i; j++) { sa = Math.Max(sa, math.abscomplex(cha[i,j])); } } } if( (double)(sa)==(double)(0) ) { sa = 1; } sa = 1/sa; // // Estimate the norm of A // if( !isnormprovided ) { anorm = 0; kase = 0; while( true ) { cmatrixestimatenorm(n, ref ev, ref ex, ref anorm, ref kase, ref isave, ref rsave); if( kase==0 ) { break; } if( isupper ) { // // Multiply by U // for(i=1; i<=n; i++) { i1_ = (i)-(i-1); v = 0.0; for(i_=i-1; i_<=n-1;i_++) { v += cha[i-1,i_]*ex[i_+i1_]; } ex[i] = v; } for(i_=1; i_<=n;i_++) { ex[i_] = sa*ex[i_]; } // // Multiply by U' // for(i=0; i<=n-1; i++) { tmp[i] = 0; } for(i=0; i<=n-1; i++) { v = ex[i+1]; for(i_=i; i_<=n-1;i_++) { tmp[i_] = tmp[i_] + v*math.conj(cha[i,i_]); } } i1_ = (0) - (1); for(i_=1; i_<=n;i_++) { ex[i_] = tmp[i_+i1_]; } for(i_=1; i_<=n;i_++) { ex[i_] = sa*ex[i_]; } } else { // // Multiply by L' // for(i=0; i<=n-1; i++) { tmp[i] = 0; } for(i=0; i<=n-1; i++) { v = ex[i+1]; for(i_=0; i_<=i;i_++) { tmp[i_] = tmp[i_] + v*math.conj(cha[i,i_]); } } i1_ = (0) - (1); for(i_=1; i_<=n;i_++) { ex[i_] = tmp[i_+i1_]; } for(i_=1; i_<=n;i_++) { ex[i_] = sa*ex[i_]; } // // Multiply by L // for(i=n; i>=1; i--) { i1_ = (1)-(0); v = 0.0; for(i_=0; i_<=i-1;i_++) { v += cha[i-1,i_]*ex[i_+i1_]; } ex[i] = v; } for(i_=1; i_<=n;i_++) { ex[i_] = sa*ex[i_]; } } } } // // Quick return if possible // After this block we assume that ANORM<>0 // if( (double)(anorm)==(double)(0) ) { return; } if( n==1 ) { rc = 1; return; } // // Estimate the norm of inv(A). // ainvnm = 0; kase = 0; while( true ) { cmatrixestimatenorm(n, ref ev, ref ex, ref ainvnm, ref kase, ref isave, ref rsave); if( kase==0 ) { break; } for(i=0; i<=n-1; i++) { ex[i] = ex[i+1]; } if( isupper ) { // // Multiply by inv(U'). // if( !safesolve.cmatrixscaledtrsafesolve(cha, sa, n, ref ex, isupper, 2, false, maxgrowth) ) { return; } // // Multiply by inv(U). // if( !safesolve.cmatrixscaledtrsafesolve(cha, sa, n, ref ex, isupper, 0, false, maxgrowth) ) { return; } } else { // // Multiply by inv(L). // if( !safesolve.cmatrixscaledtrsafesolve(cha, sa, n, ref ex, isupper, 0, false, maxgrowth) ) { return; } // // Multiply by inv(L'). // if( !safesolve.cmatrixscaledtrsafesolve(cha, sa, n, ref ex, isupper, 2, false, maxgrowth) ) { return; } } for(i=n-1; i>=0; i--) { ex[i+1] = ex[i]; } } // // Compute the estimate of the reciprocal condition number. // if( (double)(ainvnm)!=(double)(0) ) { rc = 1/ainvnm; rc = rc/anorm; if( (double)(rc)<(double)(rcondthreshold()) ) { rc = 0; } } } /************************************************************************* Internal subroutine for condition number estimation -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 *************************************************************************/ private static void rmatrixrcondluinternal(double[,] lua, int n, bool onenorm, bool isanormprovided, double anorm, ref double rc) { double[] ex = new double[0]; double[] ev = new double[0]; int[] iwork = new int[0]; double[] tmp = new double[0]; double v = 0; int i = 0; int j = 0; int kase = 0; int kase1 = 0; double ainvnm = 0; double maxgrowth = 0; double su = 0; double sl = 0; bool mupper = new bool(); bool munit = new bool(); int i_ = 0; int i1_ = 0; rc = 0; // // RC=0 if something happens // rc = 0; // // init // if( onenorm ) { kase1 = 1; } else { kase1 = 2; } mupper = true; munit = true; iwork = new int[n+1]; tmp = new double[n]; // // prepare parameters for triangular solver // maxgrowth = 1/rcondthreshold(); su = 0; sl = 1; for(i=0; i<=n-1; i++) { for(j=0; j<=i-1; j++) { sl = Math.Max(sl, Math.Abs(lua[i,j])); } for(j=i; j<=n-1; j++) { su = Math.Max(su, Math.Abs(lua[i,j])); } } if( (double)(su)==(double)(0) ) { su = 1; } su = 1/su; sl = 1/sl; // // Estimate the norm of A. // if( !isanormprovided ) { kase = 0; anorm = 0; while( true ) { rmatrixestimatenorm(n, ref ev, ref ex, ref iwork, ref anorm, ref kase); if( kase==0 ) { break; } if( kase==kase1 ) { // // Multiply by U // for(i=1; i<=n; i++) { i1_ = (i)-(i-1); v = 0.0; for(i_=i-1; i_<=n-1;i_++) { v += lua[i-1,i_]*ex[i_+i1_]; } ex[i] = v; } // // Multiply by L // for(i=n; i>=1; i--) { if( i>1 ) { i1_ = (1)-(0); v = 0.0; for(i_=0; i_<=i-2;i_++) { v += lua[i-1,i_]*ex[i_+i1_]; } } else { v = 0; } ex[i] = ex[i]+v; } } else { // // Multiply by L' // for(i=0; i<=n-1; i++) { tmp[i] = 0; } for(i=0; i<=n-1; i++) { v = ex[i+1]; if( i>=1 ) { for(i_=0; i_<=i-1;i_++) { tmp[i_] = tmp[i_] + v*lua[i,i_]; } } tmp[i] = tmp[i]+v; } i1_ = (0) - (1); for(i_=1; i_<=n;i_++) { ex[i_] = tmp[i_+i1_]; } // // Multiply by U' // for(i=0; i<=n-1; i++) { tmp[i] = 0; } for(i=0; i<=n-1; i++) { v = ex[i+1]; for(i_=i; i_<=n-1;i_++) { tmp[i_] = tmp[i_] + v*lua[i,i_]; } } i1_ = (0) - (1); for(i_=1; i_<=n;i_++) { ex[i_] = tmp[i_+i1_]; } } } } // // Scale according to SU/SL // anorm = anorm*su*sl; // // Quick return if possible // We assume that ANORM<>0 after this block // if( (double)(anorm)==(double)(0) ) { return; } if( n==1 ) { rc = 1; return; } // // Estimate the norm of inv(A). // ainvnm = 0; kase = 0; while( true ) { rmatrixestimatenorm(n, ref ev, ref ex, ref iwork, ref ainvnm, ref kase); if( kase==0 ) { break; } // // from 1-based array to 0-based // for(i=0; i<=n-1; i++) { ex[i] = ex[i+1]; } // // multiply by inv(A) or inv(A') // if( kase==kase1 ) { // // Multiply by inv(L). // if( !safesolve.rmatrixscaledtrsafesolve(lua, sl, n, ref ex, !mupper, 0, munit, maxgrowth) ) { return; } // // Multiply by inv(U). // if( !safesolve.rmatrixscaledtrsafesolve(lua, su, n, ref ex, mupper, 0, !munit, maxgrowth) ) { return; } } else { // // Multiply by inv(U'). // if( !safesolve.rmatrixscaledtrsafesolve(lua, su, n, ref ex, mupper, 1, !munit, maxgrowth) ) { return; } // // Multiply by inv(L'). // if( !safesolve.rmatrixscaledtrsafesolve(lua, sl, n, ref ex, !mupper, 1, munit, maxgrowth) ) { return; } } // // from 0-based array to 1-based // for(i=n-1; i>=0; i--) { ex[i+1] = ex[i]; } } // // Compute the estimate of the reciprocal condition number. // if( (double)(ainvnm)!=(double)(0) ) { rc = 1/ainvnm; rc = rc/anorm; if( (double)(rc)<(double)(rcondthreshold()) ) { rc = 0; } } } /************************************************************************* Condition number estimation -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 *************************************************************************/ private static void cmatrixrcondluinternal(complex[,] lua, int n, bool onenorm, bool isanormprovided, double anorm, ref double rc) { complex[] ex = new complex[0]; complex[] cwork2 = new complex[0]; complex[] cwork3 = new complex[0]; complex[] cwork4 = new complex[0]; int[] isave = new int[0]; double[] rsave = new double[0]; int kase = 0; int kase1 = 0; double ainvnm = 0; complex v = 0; int i = 0; int j = 0; double su = 0; double sl = 0; double maxgrowth = 0; int i_ = 0; int i1_ = 0; rc = 0; if( n<=0 ) { return; } cwork2 = new complex[n+1]; rc = 0; if( n==0 ) { rc = 1; return; } // // prepare parameters for triangular solver // maxgrowth = 1/rcondthreshold(); su = 0; sl = 1; for(i=0; i<=n-1; i++) { for(j=0; j<=i-1; j++) { sl = Math.Max(sl, math.abscomplex(lua[i,j])); } for(j=i; j<=n-1; j++) { su = Math.Max(su, math.abscomplex(lua[i,j])); } } if( (double)(su)==(double)(0) ) { su = 1; } su = 1/su; sl = 1/sl; // // Estimate the norm of SU*SL*A. // if( !isanormprovided ) { anorm = 0; if( onenorm ) { kase1 = 1; } else { kase1 = 2; } kase = 0; do { cmatrixestimatenorm(n, ref cwork4, ref ex, ref anorm, ref kase, ref isave, ref rsave); if( kase!=0 ) { if( kase==kase1 ) { // // Multiply by U // for(i=1; i<=n; i++) { i1_ = (i)-(i-1); v = 0.0; for(i_=i-1; i_<=n-1;i_++) { v += lua[i-1,i_]*ex[i_+i1_]; } ex[i] = v; } // // Multiply by L // for(i=n; i>=1; i--) { v = 0; if( i>1 ) { i1_ = (1)-(0); v = 0.0; for(i_=0; i_<=i-2;i_++) { v += lua[i-1,i_]*ex[i_+i1_]; } } ex[i] = v+ex[i]; } } else { // // Multiply by L' // for(i=1; i<=n; i++) { cwork2[i] = 0; } for(i=1; i<=n; i++) { v = ex[i]; if( i>1 ) { i1_ = (0) - (1); for(i_=1; i_<=i-1;i_++) { cwork2[i_] = cwork2[i_] + v*math.conj(lua[i-1,i_+i1_]); } } cwork2[i] = cwork2[i]+v; } // // Multiply by U' // for(i=1; i<=n; i++) { ex[i] = 0; } for(i=1; i<=n; i++) { v = cwork2[i]; i1_ = (i-1) - (i); for(i_=i; i_<=n;i_++) { ex[i_] = ex[i_] + v*math.conj(lua[i-1,i_+i1_]); } } } } } while( kase!=0 ); } // // Scale according to SU/SL // anorm = anorm*su*sl; // // Quick return if possible // if( (double)(anorm)==(double)(0) ) { return; } // // Estimate the norm of inv(A). // ainvnm = 0; if( onenorm ) { kase1 = 1; } else { kase1 = 2; } kase = 0; while( true ) { cmatrixestimatenorm(n, ref cwork4, ref ex, ref ainvnm, ref kase, ref isave, ref rsave); if( kase==0 ) { break; } // // From 1-based to 0-based // for(i=0; i<=n-1; i++) { ex[i] = ex[i+1]; } // // multiply by inv(A) or inv(A') // if( kase==kase1 ) { // // Multiply by inv(L). // if( !safesolve.cmatrixscaledtrsafesolve(lua, sl, n, ref ex, false, 0, true, maxgrowth) ) { rc = 0; return; } // // Multiply by inv(U). // if( !safesolve.cmatrixscaledtrsafesolve(lua, su, n, ref ex, true, 0, false, maxgrowth) ) { rc = 0; return; } } else { // // Multiply by inv(U'). // if( !safesolve.cmatrixscaledtrsafesolve(lua, su, n, ref ex, true, 2, false, maxgrowth) ) { rc = 0; return; } // // Multiply by inv(L'). // if( !safesolve.cmatrixscaledtrsafesolve(lua, sl, n, ref ex, false, 2, true, maxgrowth) ) { rc = 0; return; } } // // from 0-based to 1-based // for(i=n-1; i>=0; i--) { ex[i+1] = ex[i]; } } // // Compute the estimate of the reciprocal condition number. // if( (double)(ainvnm)!=(double)(0) ) { rc = 1/ainvnm; rc = rc/anorm; if( (double)(rc)<(double)(rcondthreshold()) ) { rc = 0; } } } /************************************************************************* Internal subroutine for matrix norm estimation -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 *************************************************************************/ private static void rmatrixestimatenorm(int n, ref double[] v, ref double[] x, ref int[] isgn, ref double est, ref int kase) { int itmax = 0; int i = 0; double t = 0; bool flg = new bool(); int positer = 0; int posj = 0; int posjlast = 0; int posjump = 0; int posaltsgn = 0; int posestold = 0; int postemp = 0; int i_ = 0; itmax = 5; posaltsgn = n+1; posestold = n+2; postemp = n+3; positer = n+1; posj = n+2; posjlast = n+3; posjump = n+4; if( kase==0 ) { v = new double[n+4]; x = new double[n+1]; isgn = new int[n+5]; t = (double)1/(double)n; for(i=1; i<=n; i++) { x[i] = t; } kase = 1; isgn[posjump] = 1; return; } // // ................ ENTRY (JUMP = 1) // FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. // if( isgn[posjump]==1 ) { if( n==1 ) { v[1] = x[1]; est = Math.Abs(v[1]); kase = 0; return; } est = 0; for(i=1; i<=n; i++) { est = est+Math.Abs(x[i]); } for(i=1; i<=n; i++) { if( (double)(x[i])>=(double)(0) ) { x[i] = 1; } else { x[i] = -1; } isgn[i] = Math.Sign(x[i]); } kase = 2; isgn[posjump] = 2; return; } // // ................ ENTRY (JUMP = 2) // FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. // if( isgn[posjump]==2 ) { isgn[posj] = 1; for(i=2; i<=n; i++) { if( (double)(Math.Abs(x[i]))>(double)(Math.Abs(x[isgn[posj]])) ) { isgn[posj] = i; } } isgn[positer] = 2; // // MAIN LOOP - ITERATIONS 2,3,...,ITMAX. // for(i=1; i<=n; i++) { x[i] = 0; } x[isgn[posj]] = 1; kase = 1; isgn[posjump] = 3; return; } // // ................ ENTRY (JUMP = 3) // X HAS BEEN OVERWRITTEN BY A*X. // if( isgn[posjump]==3 ) { for(i_=1; i_<=n;i_++) { v[i_] = x[i_]; } v[posestold] = est; est = 0; for(i=1; i<=n; i++) { est = est+Math.Abs(v[i]); } flg = false; for(i=1; i<=n; i++) { if( ((double)(x[i])>=(double)(0) && isgn[i]<0) || ((double)(x[i])<(double)(0) && isgn[i]>=0) ) { flg = true; } } // // REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. // OR MAY BE CYCLING. // if( !flg || (double)(est)<=(double)(v[posestold]) ) { v[posaltsgn] = 1; for(i=1; i<=n; i++) { x[i] = v[posaltsgn]*(1+(double)(i-1)/(double)(n-1)); v[posaltsgn] = -v[posaltsgn]; } kase = 1; isgn[posjump] = 5; return; } for(i=1; i<=n; i++) { if( (double)(x[i])>=(double)(0) ) { x[i] = 1; isgn[i] = 1; } else { x[i] = -1; isgn[i] = -1; } } kase = 2; isgn[posjump] = 4; return; } // // ................ ENTRY (JUMP = 4) // X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. // if( isgn[posjump]==4 ) { isgn[posjlast] = isgn[posj]; isgn[posj] = 1; for(i=2; i<=n; i++) { if( (double)(Math.Abs(x[i]))>(double)(Math.Abs(x[isgn[posj]])) ) { isgn[posj] = i; } } if( (double)(x[isgn[posjlast]])!=(double)(Math.Abs(x[isgn[posj]])) && isgn[positer](double)(est) ) { for(i_=1; i_<=n;i_++) { v[i_] = x[i_]; } est = v[postemp]; } kase = 0; return; } } private static void cmatrixestimatenorm(int n, ref complex[] v, ref complex[] x, ref double est, ref int kase, ref int[] isave, ref double[] rsave) { int itmax = 0; int i = 0; int iter = 0; int j = 0; int jlast = 0; int jump = 0; double absxi = 0; double altsgn = 0; double estold = 0; double safmin = 0; double temp = 0; int i_ = 0; // //Executable Statements .. // itmax = 5; safmin = math.minrealnumber; if( kase==0 ) { v = new complex[n+1]; x = new complex[n+1]; isave = new int[5]; rsave = new double[4]; for(i=1; i<=n; i++) { x[i] = (double)1/(double)n; } kase = 1; jump = 1; internalcomplexrcondsaveall(ref isave, ref rsave, ref i, ref iter, ref j, ref jlast, ref jump, ref absxi, ref altsgn, ref estold, ref temp); return; } internalcomplexrcondloadall(ref isave, ref rsave, ref i, ref iter, ref j, ref jlast, ref jump, ref absxi, ref altsgn, ref estold, ref temp); // // ENTRY (JUMP = 1) // FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. // if( jump==1 ) { if( n==1 ) { v[1] = x[1]; est = math.abscomplex(v[1]); kase = 0; internalcomplexrcondsaveall(ref isave, ref rsave, ref i, ref iter, ref j, ref jlast, ref jump, ref absxi, ref altsgn, ref estold, ref temp); return; } est = internalcomplexrcondscsum1(x, n); for(i=1; i<=n; i++) { absxi = math.abscomplex(x[i]); if( (double)(absxi)>(double)(safmin) ) { x[i] = x[i]/absxi; } else { x[i] = 1; } } kase = 2; jump = 2; internalcomplexrcondsaveall(ref isave, ref rsave, ref i, ref iter, ref j, ref jlast, ref jump, ref absxi, ref altsgn, ref estold, ref temp); return; } // // ENTRY (JUMP = 2) // FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. // if( jump==2 ) { j = internalcomplexrcondicmax1(x, n); iter = 2; // // MAIN LOOP - ITERATIONS 2,3,...,ITMAX. // for(i=1; i<=n; i++) { x[i] = 0; } x[j] = 1; kase = 1; jump = 3; internalcomplexrcondsaveall(ref isave, ref rsave, ref i, ref iter, ref j, ref jlast, ref jump, ref absxi, ref altsgn, ref estold, ref temp); return; } // // ENTRY (JUMP = 3) // X HAS BEEN OVERWRITTEN BY A*X. // if( jump==3 ) { for(i_=1; i_<=n;i_++) { v[i_] = x[i_]; } estold = est; est = internalcomplexrcondscsum1(v, n); // // TEST FOR CYCLING. // if( (double)(est)<=(double)(estold) ) { // // ITERATION COMPLETE. FINAL STAGE. // altsgn = 1; for(i=1; i<=n; i++) { x[i] = altsgn*(1+(double)(i-1)/(double)(n-1)); altsgn = -altsgn; } kase = 1; jump = 5; internalcomplexrcondsaveall(ref isave, ref rsave, ref i, ref iter, ref j, ref jlast, ref jump, ref absxi, ref altsgn, ref estold, ref temp); return; } for(i=1; i<=n; i++) { absxi = math.abscomplex(x[i]); if( (double)(absxi)>(double)(safmin) ) { x[i] = x[i]/absxi; } else { x[i] = 1; } } kase = 2; jump = 4; internalcomplexrcondsaveall(ref isave, ref rsave, ref i, ref iter, ref j, ref jlast, ref jump, ref absxi, ref altsgn, ref estold, ref temp); return; } // // ENTRY (JUMP = 4) // X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. // if( jump==4 ) { jlast = j; j = internalcomplexrcondicmax1(x, n); if( (double)(math.abscomplex(x[jlast]))!=(double)(math.abscomplex(x[j])) && iter(double)(est) ) { for(i_=1; i_<=n;i_++) { v[i_] = x[i_]; } est = temp; } kase = 0; internalcomplexrcondsaveall(ref isave, ref rsave, ref i, ref iter, ref j, ref jlast, ref jump, ref absxi, ref altsgn, ref estold, ref temp); return; } } private static double internalcomplexrcondscsum1(complex[] x, int n) { double result = 0; int i = 0; result = 0; for(i=1; i<=n; i++) { result = result+math.abscomplex(x[i]); } return result; } private static int internalcomplexrcondicmax1(complex[] x, int n) { int result = 0; int i = 0; double m = 0; result = 1; m = math.abscomplex(x[1]); for(i=2; i<=n; i++) { if( (double)(math.abscomplex(x[i]))>(double)(m) ) { result = i; m = math.abscomplex(x[i]); } } return result; } private static void internalcomplexrcondsaveall(ref int[] isave, ref double[] rsave, ref int i, ref int iter, ref int j, ref int jlast, ref int jump, ref double absxi, ref double altsgn, ref double estold, ref double temp) { isave[0] = i; isave[1] = iter; isave[2] = j; isave[3] = jlast; isave[4] = jump; rsave[0] = absxi; rsave[1] = altsgn; rsave[2] = estold; rsave[3] = temp; } private static void internalcomplexrcondloadall(ref int[] isave, ref double[] rsave, ref int i, ref int iter, ref int j, ref int jlast, ref int jump, ref double absxi, ref double altsgn, ref double estold, ref double temp) { i = isave[0]; iter = isave[1]; j = isave[2]; jlast = isave[3]; jump = isave[4]; absxi = rsave[0]; altsgn = rsave[1]; estold = rsave[2]; temp = rsave[3]; } } public class matinv { /************************************************************************* Matrix inverse report: * R1 reciprocal of condition number in 1-norm * RInf reciprocal of condition number in inf-norm *************************************************************************/ public class matinvreport : apobject { public double r1; public double rinf; public matinvreport() { init(); } public override void init() { } public override alglib.apobject make_copy() { matinvreport _result = new matinvreport(); _result.r1 = r1; _result.rinf = rinf; return _result; } }; public const int parallelsize = 64; /************************************************************************* Inversion of a matrix given by its LU decomposition. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that matrix inversion is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. INPUT PARAMETERS: A - LU decomposition of the matrix (output of RMatrixLU subroutine). Pivots - table of permutations (the output of RMatrixLU subroutine). N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) OUTPUT PARAMETERS: Info - return code: * -3 A is singular, or VERY close to singular. it is filled by zeros in such cases. * 1 task is solved (but matrix A may be ill-conditioned, check R1/RInf parameters for condition numbers). Rep - solver report, see below for more info A - inverse of matrix A. Array whose indexes range within [0..N-1, 0..N-1]. SOLVER REPORT Subroutine sets following fields of the Rep structure: * R1 reciprocal of condition number: 1/cond(A), 1-norm. * RInf reciprocal of condition number: 1/cond(A), inf-norm. -- ALGLIB routine -- 05.02.2010 Bochkanov Sergey *************************************************************************/ public static void rmatrixluinverse(ref double[,] a, int[] pivots, int n, ref int info, matinvreport rep) { double[] work = new double[0]; int i = 0; int j = 0; int k = 0; double v = 0; apserv.sinteger sinfo = new apserv.sinteger(); info = 0; alglib.ap.assert(n>0, "RMatrixLUInverse: N<=0!"); alglib.ap.assert(alglib.ap.cols(a)>=n, "RMatrixLUInverse: cols(A)=n, "RMatrixLUInverse: rows(A)=n, "RMatrixLUInverse: len(Pivots)n-1 || pivots[i]0, "RMatrixLUInverse: incorrect Pivots array!"); // // calculate condition numbers // rep.r1 = rcond.rmatrixlurcond1(a, n); rep.rinf = rcond.rmatrixlurcondinf(a, n); if( (double)(rep.r1)<(double)(rcond.rcondthreshold()) || (double)(rep.rinf)<(double)(rcond.rcondthreshold()) ) { for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 0; } } rep.r1 = 0; rep.rinf = 0; info = -3; return; } // // Call cache-oblivious code // work = new double[n]; sinfo.val = 1; rmatrixluinverserec(ref a, 0, n, ref work, sinfo, rep); info = sinfo.val; // // apply permutations // for(i=0; i<=n-1; i++) { for(j=n-2; j>=0; j--) { k = pivots[j]; v = a[i,j]; a[i,j] = a[i,k]; a[i,k] = v; } } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_rmatrixluinverse(ref double[,] a, int[] pivots, int n, ref int info, matinvreport rep) { rmatrixluinverse(ref a,pivots,n,ref info,rep); } /************************************************************************* Inversion of a general matrix. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that matrix inversion is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix. N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) Output parameters: Info - return code, same as in RMatrixLUInverse Rep - solver report, same as in RMatrixLUInverse A - inverse of matrix A, same as in RMatrixLUInverse Result: True, if the matrix is not singular. False, if the matrix is singular. -- ALGLIB -- Copyright 2005-2010 by Bochkanov Sergey *************************************************************************/ public static void rmatrixinverse(ref double[,] a, int n, ref int info, matinvreport rep) { int[] pivots = new int[0]; info = 0; alglib.ap.assert(n>0, "RMatrixInverse: N<=0!"); alglib.ap.assert(alglib.ap.cols(a)>=n, "RMatrixInverse: cols(A)=n, "RMatrixInverse: rows(A)0, "CMatrixLUInverse: N<=0!"); alglib.ap.assert(alglib.ap.cols(a)>=n, "CMatrixLUInverse: cols(A)=n, "CMatrixLUInverse: rows(A)=n, "CMatrixLUInverse: len(Pivots)n-1 || pivots[i]0, "CMatrixLUInverse: incorrect Pivots array!"); // // calculate condition numbers // rep.r1 = rcond.cmatrixlurcond1(a, n); rep.rinf = rcond.cmatrixlurcondinf(a, n); if( (double)(rep.r1)<(double)(rcond.rcondthreshold()) || (double)(rep.rinf)<(double)(rcond.rcondthreshold()) ) { for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 0; } } rep.r1 = 0; rep.rinf = 0; info = -3; return; } // // Call cache-oblivious code // work = new complex[n]; cmatrixluinverserec(ref a, 0, n, ref work, ref info, rep); // // apply permutations // for(i=0; i<=n-1; i++) { for(j=n-2; j>=0; j--) { k = pivots[j]; v = a[i,j]; a[i,j] = a[i,k]; a[i,k] = v; } } } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_cmatrixluinverse(ref complex[,] a, int[] pivots, int n, ref int info, matinvreport rep) { cmatrixluinverse(ref a,pivots,n,ref info,rep); } /************************************************************************* Inversion of a general matrix. COMMERCIAL EDITION OF ALGLIB: ! Commercial version of ALGLIB includes two important improvements of ! this function, which can be used from C++ and C#: ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB) ! * multicore support ! ! Intel MKL gives approximately constant (with respect to number of ! worker threads) acceleration factor which depends on CPU being used, ! problem size and "baseline" ALGLIB edition which is used for ! comparison. ! ! Say, on SSE2-capable CPU with N=1024, HPC ALGLIB will be: ! * about 2-3x faster than ALGLIB for C++ without MKL ! * about 7-10x faster than "pure C#" edition of ALGLIB ! Difference in performance will be more striking on newer CPU's with ! support for newer SIMD instructions. Generally, MKL accelerates any ! problem whose size is at least 128, with best efficiency achieved for ! N's larger than 512. ! ! Commercial edition of ALGLIB also supports multithreaded acceleration ! of this function. We should note that matrix inversion is harder to ! parallelize than, say, matrix-matrix product - this algorithm has ! many internal synchronization points which can not be avoided. However ! parallelism starts to be profitable starting from N=1024, achieving ! near-linear speedup for N=4096 or higher. ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! We recommend you to read 'Working with commercial version' section of ! ALGLIB Reference Manual in order to find out how to use performance- ! related features provided by commercial edition of ALGLIB. Input parameters: A - matrix N - size of matrix A (optional) : * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, size is automatically determined from matrix size (A must be square matrix) Output parameters: Info - return code, same as in RMatrixLUInverse Rep - solver report, same as in RMatrixLUInverse A - inverse of matrix A, same as in RMatrixLUInverse -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static void cmatrixinverse(ref complex[,] a, int n, ref int info, matinvreport rep) { int[] pivots = new int[0]; info = 0; alglib.ap.assert(n>0, "CRMatrixInverse: N<=0!"); alglib.ap.assert(alglib.ap.cols(a)>=n, "CRMatrixInverse: cols(A)=n, "CRMatrixInverse: rows(A)0, "SPDMatrixCholeskyInverse: N<=0!"); alglib.ap.assert(alglib.ap.cols(a)>=n, "SPDMatrixCholeskyInverse: cols(A)=n, "SPDMatrixCholeskyInverse: rows(A)0, "SPDMatrixInverse: N<=0!"); alglib.ap.assert(alglib.ap.cols(a)>=n, "SPDMatrixInverse: cols(A)=n, "SPDMatrixInverse: rows(A)0, "HPDMatrixCholeskyInverse: N<=0!"); alglib.ap.assert(alglib.ap.cols(a)>=n, "HPDMatrixCholeskyInverse: cols(A)=n, "HPDMatrixCholeskyInverse: rows(A)0, "HPDMatrixInverse: N<=0!"); alglib.ap.assert(alglib.ap.cols(a)>=n, "HPDMatrixInverse: cols(A)=n, "HPDMatrixInverse: rows(A)0, "RMatrixTRInverse: N<=0!"); alglib.ap.assert(alglib.ap.cols(a)>=n, "RMatrixTRInverse: cols(A)=n, "RMatrixTRInverse: rows(A)0, "CMatrixTRInverse: N<=0!"); alglib.ap.assert(alglib.ap.cols(a)>=n, "CMatrixTRInverse: cols(A)=n, "CMatrixTRInverse: rows(A)0, "SPDMatrixCholeskyInverseRec: integrity check failed"); if( isupper ) { // // Compute the product U * U'. // NOTE: we never assume that diagonal of U is real // for(i=0; i<=n-1; i++) { if( i==0 ) { // // 1x1 matrix // a[offs+i,offs+i] = math.sqr(a[offs+i,offs+i]); } else { // // (I+1)x(I+1) matrix, // // ( A11 A12 ) ( A11^H ) ( A11*A11^H+A12*A12^H A12*A22^H ) // ( ) * ( ) = ( ) // ( A22 ) ( A12^H A22^H ) ( A22*A12^H A22*A22^H ) // // A11 is IxI, A22 is 1x1. // i1_ = (offs) - (0); for(i_=0; i_<=i-1;i_++) { tmp[i_] = a[i_+i1_,offs+i]; } for(j=0; j<=i-1; j++) { v = a[offs+j,offs+i]; i1_ = (j) - (offs+j); for(i_=offs+j; i_<=offs+i-1;i_++) { a[offs+j,i_] = a[offs+j,i_] + v*tmp[i_+i1_]; } } v = a[offs+i,offs+i]; for(i_=offs; i_<=offs+i-1;i_++) { a[i_,offs+i] = v*a[i_,offs+i]; } a[offs+i,offs+i] = math.sqr(a[offs+i,offs+i]); } } } else { // // Compute the product L' * L // NOTE: we never assume that diagonal of L is real // for(i=0; i<=n-1; i++) { if( i==0 ) { // // 1x1 matrix // a[offs+i,offs+i] = math.sqr(a[offs+i,offs+i]); } else { // // (I+1)x(I+1) matrix, // // ( A11^H A21^H ) ( A11 ) ( A11^H*A11+A21^H*A21 A21^H*A22 ) // ( ) * ( ) = ( ) // ( A22^H ) ( A21 A22 ) ( A22^H*A21 A22^H*A22 ) // // A11 is IxI, A22 is 1x1. // i1_ = (offs) - (0); for(i_=0; i_<=i-1;i_++) { tmp[i_] = a[offs+i,i_+i1_]; } for(j=0; j<=i-1; j++) { v = a[offs+i,offs+j]; i1_ = (0) - (offs); for(i_=offs; i_<=offs+j;i_++) { a[offs+j,i_] = a[offs+j,i_] + v*tmp[i_+i1_]; } } v = a[offs+i,offs+i]; for(i_=offs; i_<=offs+i-1;i_++) { a[offs+i,i_] = v*a[offs+i,i_]; } a[offs+i,offs+i] = math.sqr(a[offs+i,offs+i]); } } } return; } // // Recursive code: triangular factor inversion merged with // UU' or L'L multiplication // ablas.ablassplitlength(a, n, ref n1, ref n2); // // form off-diagonal block of trangular inverse // if( isupper ) { for(i=0; i<=n1-1; i++) { for(i_=offs+n1; i_<=offs+n-1;i_++) { a[offs+i,i_] = -1*a[offs+i,i_]; } } ablas.rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, false, 0, a, offs, offs+n1); ablas.rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, false, 0, a, offs, offs+n1); } else { for(i=0; i<=n2-1; i++) { for(i_=offs; i_<=offs+n1-1;i_++) { a[offs+n1+i,i_] = -1*a[offs+n1+i,i_]; } } ablas.rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, false, 0, a, offs+n1, offs); ablas.rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, false, 0, a, offs+n1, offs); } // // invert first diagonal block // spdmatrixcholeskyinverserec(ref a, offs, n1, isupper, ref tmp); // // update first diagonal block with off-diagonal block, // update off-diagonal block // if( isupper ) { ablas.rmatrixsyrk(n1, n2, 1.0, a, offs, offs+n1, 0, 1.0, a, offs, offs, isupper); ablas.rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, false, 1, a, offs, offs+n1); } else { ablas.rmatrixsyrk(n1, n2, 1.0, a, offs+n1, offs, 1, 1.0, a, offs, offs, isupper); ablas.rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, false, 1, a, offs+n1, offs); } // // invert second diagonal block // spdmatrixcholeskyinverserec(ref a, offs+n1, n2, isupper, ref tmp); } /************************************************************************* Triangular matrix inversion, recursive subroutine NOTE: this function sets Info on failure, leaves it unchanged on success. NOTE: only Tmp[Offs:Offs+N-1] is modified, other entries of the temporary array are not modified -- ALGLIB -- 05.02.2010, Bochkanov Sergey. Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992. *************************************************************************/ private static void rmatrixtrinverserec(double[,] a, int offs, int n, bool isupper, bool isunit, double[] tmp, apserv.sinteger info, matinvreport rep) { int n1 = 0; int n2 = 0; int i = 0; int j = 0; double v = 0; double ajj = 0; int i_ = 0; if( n<1 ) { info.val = -1; return; } // // Base case // if( n<=ablas.ablasblocksize(a) ) { if( isupper ) { // // Compute inverse of upper triangular matrix. // for(j=0; j<=n-1; j++) { if( !isunit ) { if( (double)(a[offs+j,offs+j])==(double)(0) ) { info.val = -3; return; } a[offs+j,offs+j] = 1/a[offs+j,offs+j]; ajj = -a[offs+j,offs+j]; } else { ajj = -1; } // // Compute elements 1:j-1 of j-th column. // if( j>0 ) { for(i_=offs+0; i_<=offs+j-1;i_++) { tmp[i_] = a[i_,offs+j]; } for(i=0; i<=j-1; i++) { if( i=0; j--) { if( !isunit ) { if( (double)(a[offs+j,offs+j])==(double)(0) ) { info.val = -3; return; } a[offs+j,offs+j] = 1/a[offs+j,offs+j]; ajj = -a[offs+j,offs+j]; } else { ajj = -1; } if( jj+1 ) { v = 0.0; for(i_=offs+j+1; i_<=offs+i-1;i_++) { v += a[offs+i,i_]*tmp[i_]; } } else { v = 0; } if( !isunit ) { a[offs+i,offs+j] = v+a[offs+i,offs+i]*tmp[offs+i]; } else { a[offs+i,offs+j] = v+tmp[offs+i]; } } for(i_=offs+j+1; i_<=offs+n-1;i_++) { a[i_,offs+j] = ajj*a[i_,offs+j]; } } } } return; } // // Recursive case // ablas.ablassplitlength(a, n, ref n1, ref n2); if( n2>0 ) { if( isupper ) { for(i=0; i<=n1-1; i++) { for(i_=offs+n1; i_<=offs+n-1;i_++) { a[offs+i,i_] = -1*a[offs+i,i_]; } } ablas.rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs, offs+n1); ablas.rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, isunit, 0, a, offs, offs+n1); rmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, rep); } else { for(i=0; i<=n2-1; i++) { for(i_=offs; i_<=offs+n1-1;i_++) { a[offs+n1+i,i_] = -1*a[offs+n1+i,i_]; } } ablas.rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs+n1, offs); ablas.rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, isunit, 0, a, offs+n1, offs); rmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, rep); } } rmatrixtrinverserec(a, offs, n1, isupper, isunit, tmp, info, rep); } /************************************************************************* Triangular matrix inversion, recursive subroutine -- ALGLIB -- 05.02.2010, Bochkanov Sergey. Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992. *************************************************************************/ private static void cmatrixtrinverserec(ref complex[,] a, int offs, int n, bool isupper, bool isunit, ref complex[] tmp, ref int info, matinvreport rep) { int n1 = 0; int n2 = 0; int i = 0; int j = 0; complex v = 0; complex ajj = 0; int i_ = 0; int i1_ = 0; if( n<1 ) { info = -1; return; } // // Base case // if( n<=ablas.ablascomplexblocksize(a) ) { if( isupper ) { // // Compute inverse of upper triangular matrix. // for(j=0; j<=n-1; j++) { if( !isunit ) { if( a[offs+j,offs+j]==0 ) { info = -3; return; } a[offs+j,offs+j] = 1/a[offs+j,offs+j]; ajj = -a[offs+j,offs+j]; } else { ajj = -1; } // // Compute elements 1:j-1 of j-th column. // if( j>0 ) { i1_ = (offs+0) - (0); for(i_=0; i_<=j-1;i_++) { tmp[i_] = a[i_+i1_,offs+j]; } for(i=0; i<=j-1; i++) { if( i=0; j--) { if( !isunit ) { if( a[offs+j,offs+j]==0 ) { info = -3; return; } a[offs+j,offs+j] = 1/a[offs+j,offs+j]; ajj = -a[offs+j,offs+j]; } else { ajj = -1; } if( jj+1 ) { i1_ = (j+1)-(offs+j+1); v = 0.0; for(i_=offs+j+1; i_<=offs+i-1;i_++) { v += a[offs+i,i_]*tmp[i_+i1_]; } } else { v = 0; } if( !isunit ) { a[offs+i,offs+j] = v+a[offs+i,offs+i]*tmp[i]; } else { a[offs+i,offs+j] = v+tmp[i]; } } for(i_=offs+j+1; i_<=offs+n-1;i_++) { a[i_,offs+j] = ajj*a[i_,offs+j]; } } } } return; } // // Recursive case // ablas.ablascomplexsplitlength(a, n, ref n1, ref n2); if( n2>0 ) { if( isupper ) { for(i=0; i<=n1-1; i++) { for(i_=offs+n1; i_<=offs+n-1;i_++) { a[offs+i,i_] = -1*a[offs+i,i_]; } } ablas.cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, isunit, 0, a, offs, offs+n1); ablas.cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs, offs+n1); } else { for(i=0; i<=n2-1; i++) { for(i_=offs; i_<=offs+n1-1;i_++) { a[offs+n1+i,i_] = -1*a[offs+n1+i,i_]; } } ablas.cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, isunit, 0, a, offs+n1, offs); ablas.cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs+n1, offs); } cmatrixtrinverserec(ref a, offs+n1, n2, isupper, isunit, ref tmp, ref info, rep); } cmatrixtrinverserec(ref a, offs, n1, isupper, isunit, ref tmp, ref info, rep); } private static void rmatrixluinverserec(ref double[,] a, int offs, int n, ref double[] work, apserv.sinteger info, matinvreport rep) { int i = 0; int j = 0; double v = 0; int n1 = 0; int n2 = 0; int i_ = 0; int i1_ = 0; if( n<1 ) { info.val = -1; return; } // // Base case // if( n<=ablas.ablasblocksize(a) ) { // // Form inv(U) // rmatrixtrinverserec(a, offs, n, true, false, work, info, rep); if( info.val<=0 ) { return; } // // Solve the equation inv(A)*L = inv(U) for inv(A). // for(j=n-1; j>=0; j--) { // // Copy current column of L to WORK and replace with zeros. // for(i=j+1; i<=n-1; i++) { work[i] = a[offs+i,offs+j]; a[offs+i,offs+j] = 0; } // // Compute current column of inv(A). // if( j0, "LUInverseRec: internal error!"); // // X := inv(U1)*U12*inv(U2) // ablas.rmatrixlefttrsm(n1, n2, a, offs, offs, true, false, 0, a, offs, offs+n1); ablas.rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, true, false, 0, a, offs, offs+n1); // // Y := inv(L2)*L12*inv(L1) // ablas.rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, false, true, 0, a, offs+n1, offs); ablas.rmatrixrighttrsm(n2, n1, a, offs, offs, false, true, 0, a, offs+n1, offs); // // W := inv(L1*U1)+X*Y // rmatrixluinverserec(ref a, offs, n1, ref work, info, rep); if( info.val<=0 ) { return; } ablas.rmatrixgemm(n1, n1, n2, 1.0, a, offs, offs+n1, 0, a, offs+n1, offs, 0, 1.0, a, offs, offs); // // X := -X*inv(L2) // Y := -inv(U2)*Y // ablas.rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, false, true, 0, a, offs, offs+n1); for(i=0; i<=n1-1; i++) { for(i_=offs+n1; i_<=offs+n-1;i_++) { a[offs+i,i_] = -1*a[offs+i,i_]; } } ablas.rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, true, false, 0, a, offs+n1, offs); for(i=0; i<=n2-1; i++) { for(i_=offs; i_<=offs+n1-1;i_++) { a[offs+n1+i,i_] = -1*a[offs+n1+i,i_]; } } // // Z := inv(L2*U2) // rmatrixluinverserec(ref a, offs+n1, n2, ref work, info, rep); } private static void cmatrixluinverserec(ref complex[,] a, int offs, int n, ref complex[] work, ref int info, matinvreport rep) { int i = 0; int j = 0; complex v = 0; int n1 = 0; int n2 = 0; int i_ = 0; int i1_ = 0; if( n<1 ) { info = -1; return; } // // Base case // if( n<=ablas.ablascomplexblocksize(a) ) { // // Form inv(U) // cmatrixtrinverserec(ref a, offs, n, true, false, ref work, ref info, rep); if( info<=0 ) { return; } // // Solve the equation inv(A)*L = inv(U) for inv(A). // for(j=n-1; j>=0; j--) { // // Copy current column of L to WORK and replace with zeros. // for(i=j+1; i<=n-1; i++) { work[i] = a[offs+i,offs+j]; a[offs+i,offs+j] = 0; } // // Compute current column of inv(A). // if( j0, "LUInverseRec: internal error!"); // // X := inv(U1)*U12*inv(U2) // ablas.cmatrixlefttrsm(n1, n2, a, offs, offs, true, false, 0, a, offs, offs+n1); ablas.cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, true, false, 0, a, offs, offs+n1); // // Y := inv(L2)*L12*inv(L1) // ablas.cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, false, true, 0, a, offs+n1, offs); ablas.cmatrixrighttrsm(n2, n1, a, offs, offs, false, true, 0, a, offs+n1, offs); // // W := inv(L1*U1)+X*Y // cmatrixluinverserec(ref a, offs, n1, ref work, ref info, rep); if( info<=0 ) { return; } ablas.cmatrixgemm(n1, n1, n2, 1.0, a, offs, offs+n1, 0, a, offs+n1, offs, 0, 1.0, a, offs, offs); // // X := -X*inv(L2) // Y := -inv(U2)*Y // ablas.cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, false, true, 0, a, offs, offs+n1); for(i=0; i<=n1-1; i++) { for(i_=offs+n1; i_<=offs+n-1;i_++) { a[offs+i,i_] = -1*a[offs+i,i_]; } } ablas.cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, true, false, 0, a, offs+n1, offs); for(i=0; i<=n2-1; i++) { for(i_=offs; i_<=offs+n1-1;i_++) { a[offs+n1+i,i_] = -1*a[offs+n1+i,i_]; } } // // Z := inv(L2*U2) // cmatrixluinverserec(ref a, offs+n1, n2, ref work, ref info, rep); } /************************************************************************* Recursive subroutine for HPD inversion. -- ALGLIB routine -- 10.02.2010 Bochkanov Sergey *************************************************************************/ private static void hpdmatrixcholeskyinverserec(ref complex[,] a, int offs, int n, bool isupper, ref complex[] tmp) { int i = 0; int j = 0; complex v = 0; int n1 = 0; int n2 = 0; int info2 = 0; matinvreport rep2 = new matinvreport(); int i_ = 0; int i1_ = 0; if( n<1 ) { return; } // // Base case // if( n<=ablas.ablascomplexblocksize(a) ) { cmatrixtrinverserec(ref a, offs, n, isupper, false, ref tmp, ref info2, rep2); if( isupper ) { // // Compute the product U * U'. // NOTE: we never assume that diagonal of U is real // for(i=0; i<=n-1; i++) { if( i==0 ) { // // 1x1 matrix // a[offs+i,offs+i] = math.sqr(a[offs+i,offs+i].x)+math.sqr(a[offs+i,offs+i].y); } else { // // (I+1)x(I+1) matrix, // // ( A11 A12 ) ( A11^H ) ( A11*A11^H+A12*A12^H A12*A22^H ) // ( ) * ( ) = ( ) // ( A22 ) ( A12^H A22^H ) ( A22*A12^H A22*A22^H ) // // A11 is IxI, A22 is 1x1. // i1_ = (offs) - (0); for(i_=0; i_<=i-1;i_++) { tmp[i_] = math.conj(a[i_+i1_,offs+i]); } for(j=0; j<=i-1; j++) { v = a[offs+j,offs+i]; i1_ = (j) - (offs+j); for(i_=offs+j; i_<=offs+i-1;i_++) { a[offs+j,i_] = a[offs+j,i_] + v*tmp[i_+i1_]; } } v = math.conj(a[offs+i,offs+i]); for(i_=offs; i_<=offs+i-1;i_++) { a[i_,offs+i] = v*a[i_,offs+i]; } a[offs+i,offs+i] = math.sqr(a[offs+i,offs+i].x)+math.sqr(a[offs+i,offs+i].y); } } } else { // // Compute the product L' * L // NOTE: we never assume that diagonal of L is real // for(i=0; i<=n-1; i++) { if( i==0 ) { // // 1x1 matrix // a[offs+i,offs+i] = math.sqr(a[offs+i,offs+i].x)+math.sqr(a[offs+i,offs+i].y); } else { // // (I+1)x(I+1) matrix, // // ( A11^H A21^H ) ( A11 ) ( A11^H*A11+A21^H*A21 A21^H*A22 ) // ( ) * ( ) = ( ) // ( A22^H ) ( A21 A22 ) ( A22^H*A21 A22^H*A22 ) // // A11 is IxI, A22 is 1x1. // i1_ = (offs) - (0); for(i_=0; i_<=i-1;i_++) { tmp[i_] = a[offs+i,i_+i1_]; } for(j=0; j<=i-1; j++) { v = math.conj(a[offs+i,offs+j]); i1_ = (0) - (offs); for(i_=offs; i_<=offs+j;i_++) { a[offs+j,i_] = a[offs+j,i_] + v*tmp[i_+i1_]; } } v = math.conj(a[offs+i,offs+i]); for(i_=offs; i_<=offs+i-1;i_++) { a[offs+i,i_] = v*a[offs+i,i_]; } a[offs+i,offs+i] = math.sqr(a[offs+i,offs+i].x)+math.sqr(a[offs+i,offs+i].y); } } } return; } // // Recursive code: triangular factor inversion merged with // UU' or L'L multiplication // ablas.ablascomplexsplitlength(a, n, ref n1, ref n2); // // form off-diagonal block of trangular inverse // if( isupper ) { for(i=0; i<=n1-1; i++) { for(i_=offs+n1; i_<=offs+n-1;i_++) { a[offs+i,i_] = -1*a[offs+i,i_]; } } ablas.cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, false, 0, a, offs, offs+n1); ablas.cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, false, 0, a, offs, offs+n1); } else { for(i=0; i<=n2-1; i++) { for(i_=offs; i_<=offs+n1-1;i_++) { a[offs+n1+i,i_] = -1*a[offs+n1+i,i_]; } } ablas.cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, false, 0, a, offs+n1, offs); ablas.cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, false, 0, a, offs+n1, offs); } // // invert first diagonal block // hpdmatrixcholeskyinverserec(ref a, offs, n1, isupper, ref tmp); // // update first diagonal block with off-diagonal block, // update off-diagonal block // if( isupper ) { ablas.cmatrixherk(n1, n2, 1.0, a, offs, offs+n1, 0, 1.0, a, offs, offs, isupper); ablas.cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, false, 2, a, offs, offs+n1); } else { ablas.cmatrixherk(n1, n2, 1.0, a, offs+n1, offs, 2, 1.0, a, offs, offs, isupper); ablas.cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, false, 2, a, offs+n1, offs); } // // invert second diagonal block // hpdmatrixcholeskyinverserec(ref a, offs+n1, n2, isupper, ref tmp); } } public class fbls { /************************************************************************* Structure which stores state of linear CG solver between subsequent calls of FBLSCgIteration(). Initialized with FBLSCGCreate(). USAGE: 1. call to FBLSCGCreate() 2. F:=FBLSCgIteration(State) 3. if F is False, iterations are over 4. otherwise, fill State.AX with A*x, State.XAX with x'*A*x 5. goto 2 If you want to rerminate iterations, pass zero or negative value to XAX. FIELDS: E1 - 2-norm of residual at the start E2 - 2-norm of residual at the end X - on return from FBLSCgIteration() it contains vector for matrix-vector product AX - must be filled with A*x if FBLSCgIteration() returned True XAX - must be filled with x'*A*x XK - contains result (if FBLSCgIteration() returned False) Other fields are private and should not be used by outsiders. *************************************************************************/ public class fblslincgstate : apobject { public double e1; public double e2; public double[] x; public double[] ax; public double xax; public int n; public double[] rk; public double[] rk1; public double[] xk; public double[] xk1; public double[] pk; public double[] pk1; public double[] b; public rcommstate rstate; public double[] tmp2; public fblslincgstate() { init(); } public override void init() { x = new double[0]; ax = new double[0]; rk = new double[0]; rk1 = new double[0]; xk = new double[0]; xk1 = new double[0]; pk = new double[0]; pk1 = new double[0]; b = new double[0]; rstate = new rcommstate(); tmp2 = new double[0]; } public override alglib.apobject make_copy() { fblslincgstate _result = new fblslincgstate(); _result.e1 = e1; _result.e2 = e2; _result.x = (double[])x.Clone(); _result.ax = (double[])ax.Clone(); _result.xax = xax; _result.n = n; _result.rk = (double[])rk.Clone(); _result.rk1 = (double[])rk1.Clone(); _result.xk = (double[])xk.Clone(); _result.xk1 = (double[])xk1.Clone(); _result.pk = (double[])pk.Clone(); _result.pk1 = (double[])pk1.Clone(); _result.b = (double[])b.Clone(); _result.rstate = (rcommstate)rstate.make_copy(); _result.tmp2 = (double[])tmp2.Clone(); return _result; } }; /************************************************************************* Basic Cholesky solver for ScaleA*Cholesky(A)'*x = y. This subroutine assumes that: * A*ScaleA is well scaled * A is well-conditioned, so no zero divisions or overflow may occur INPUT PARAMETERS: CHA - Cholesky decomposition of A SqrtScaleA- square root of scale factor ScaleA N - matrix size, N>=0. IsUpper - storage type XB - right part Tmp - buffer; function automatically allocates it, if it is too small. It can be reused if function is called several times. OUTPUT PARAMETERS: XB - solution NOTE 1: no assertion or tests are done during algorithm operation NOTE 2: N=0 will force algorithm to silently return -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void fblscholeskysolve(double[,] cha, double sqrtscalea, int n, bool isupper, double[] xb, ref double[] tmp) { int i = 0; double v = 0; int i_ = 0; if( n==0 ) { return; } if( alglib.ap.len(tmp)=0; i--) { if( i0 ) { for(i_=0; i_<=i-1;i_++) { tmp[i_] = sqrtscalea*cha[i,i_]; } v = 0.0; for(i_=0; i_<=i-1;i_++) { v += tmp[i_]*xb[i_]; } xb[i] = xb[i]-v; } xb[i] = xb[i]/(sqrtscalea*cha[i,i]); } // // Solve L'*x=y then. // for(i=n-1; i>=0; i--) { xb[i] = xb[i]/(sqrtscalea*cha[i,i]); if( i>0 ) { v = xb[i]; for(i_=0; i_<=i-1;i_++) { tmp[i_] = sqrtscalea*cha[i,i_]; } for(i_=0; i_<=i-1;i_++) { xb[i_] = xb[i_] - v*tmp[i_]; } } } } } /************************************************************************* Fast basic linear solver: linear SPD CG Solves (A^T*A + alpha*I)*x = b where: * A is MxN matrix * alpha>0 is a scalar * I is NxN identity matrix * b is Nx1 vector * X is Nx1 unknown vector. N iterations of linear conjugate gradient are used to solve problem. INPUT PARAMETERS: A - array[M,N], matrix M - number of rows N - number of unknowns B - array[N], right part X - initial approxumation, array[N] Buf - buffer; function automatically allocates it, if it is too small. It can be reused if function is called several times with same M and N. OUTPUT PARAMETERS: X - improved solution NOTES: * solver checks quality of improved solution. If (because of problem condition number, numerical noise, etc.) new solution is WORSE than original approximation, then original approximation is returned. * solver assumes that both A, B, Alpha are well scaled (i.e. they are less than sqrt(overflow) and greater than sqrt(underflow)). -- ALGLIB -- Copyright 20.08.2009 by Bochkanov Sergey *************************************************************************/ public static void fblssolvecgx(double[,] a, int m, int n, double alpha, double[] b, ref double[] x, ref double[] buf) { int k = 0; int offsrk = 0; int offsrk1 = 0; int offsxk = 0; int offsxk1 = 0; int offspk = 0; int offspk1 = 0; int offstmp1 = 0; int offstmp2 = 0; int bs = 0; double e1 = 0; double e2 = 0; double rk2 = 0; double rk12 = 0; double pap = 0; double s = 0; double betak = 0; double v1 = 0; double v2 = 0; int i_ = 0; int i1_ = 0; // // Test for special case: B=0 // v1 = 0.0; for(i_=0; i_<=n-1;i_++) { v1 += b[i_]*b[i_]; } if( (double)(v1)==(double)(0) ) { for(k=0; k<=n-1; k++) { x[k] = 0; } return; } // // Offsets inside Buf for: // * R[K], R[K+1] // * X[K], X[K+1] // * P[K], P[K+1] // * Tmp1 - array[M], Tmp2 - array[N] // offsrk = 0; offsrk1 = offsrk+n; offsxk = offsrk1+n; offsxk1 = offsxk+n; offspk = offsxk1+n; offspk1 = offspk+n; offstmp1 = offspk1+n; offstmp2 = offstmp1+m; bs = offstmp2+n; if( alglib.ap.len(buf)=0 ) { n = state.rstate.ia[0]; k = state.rstate.ia[1]; rk2 = state.rstate.ra[0]; rk12 = state.rstate.ra[1]; pap = state.rstate.ra[2]; s = state.rstate.ra[3]; betak = state.rstate.ra[4]; v1 = state.rstate.ra[5]; v2 = state.rstate.ra[6]; } else { n = -983; k = -989; rk2 = -834; rk12 = 900; pap = -287; s = 364; betak = 214; v1 = -338; v2 = -686; } if( state.rstate.stage==0 ) { goto lbl_0; } if( state.rstate.stage==1 ) { goto lbl_1; } if( state.rstate.stage==2 ) { goto lbl_2; } // // Routine body // // // prepare locals // n = state.n; // // Test for special case: B=0 // v1 = 0.0; for(i_=0; i_<=n-1;i_++) { v1 += state.b[i_]*state.b[i_]; } if( (double)(v1)==(double)(0) ) { for(k=0; k<=n-1; k++) { state.xk[k] = 0; } result = false; return result; } // // r(0) = b-A*x(0) // RK2 = r(0)'*r(0) // for(i_=0; i_<=n-1;i_++) { state.x[i_] = state.xk[i_]; } state.rstate.stage = 0; goto lbl_rcomm; lbl_0: for(i_=0; i_<=n-1;i_++) { state.rk[i_] = state.b[i_]; } for(i_=0; i_<=n-1;i_++) { state.rk[i_] = state.rk[i_] - state.ax[i_]; } rk2 = 0.0; for(i_=0; i_<=n-1;i_++) { rk2 += state.rk[i_]*state.rk[i_]; } for(i_=0; i_<=n-1;i_++) { state.pk[i_] = state.rk[i_]; } state.e1 = Math.Sqrt(rk2); // // Cycle // k = 0; lbl_3: if( k>n-1 ) { goto lbl_5; } // // Calculate A*p(k) - store in State.Tmp2 // and p(k)'*A*p(k) - store in PAP // // If PAP=0, break (iteration is over) // for(i_=0; i_<=n-1;i_++) { state.x[i_] = state.pk[i_]; } state.rstate.stage = 1; goto lbl_rcomm; lbl_1: for(i_=0; i_<=n-1;i_++) { state.tmp2[i_] = state.ax[i_]; } pap = state.xax; if( !math.isfinite(pap) ) { goto lbl_5; } if( (double)(pap)<=(double)(0) ) { goto lbl_5; } // // S = (r(k)'*r(k))/(p(k)'*A*p(k)) // s = rk2/pap; // // x(k+1) = x(k) + S*p(k) // for(i_=0; i_<=n-1;i_++) { state.xk1[i_] = state.xk[i_]; } for(i_=0; i_<=n-1;i_++) { state.xk1[i_] = state.xk1[i_] + s*state.pk[i_]; } // // r(k+1) = r(k) - S*A*p(k) // RK12 = r(k+1)'*r(k+1) // // Break if r(k+1) small enough (when compared to r(k)) // for(i_=0; i_<=n-1;i_++) { state.rk1[i_] = state.rk[i_]; } for(i_=0; i_<=n-1;i_++) { state.rk1[i_] = state.rk1[i_] - s*state.tmp2[i_]; } rk12 = 0.0; for(i_=0; i_<=n-1;i_++) { rk12 += state.rk1[i_]*state.rk1[i_]; } if( (double)(Math.Sqrt(rk12))<=(double)(100*math.machineepsilon*state.e1) ) { // // X(k) = x(k+1) before exit - // - because we expect to find solution at x(k) // for(i_=0; i_<=n-1;i_++) { state.xk[i_] = state.xk1[i_]; } goto lbl_5; } // // BetaK = RK12/RK2 // p(k+1) = r(k+1)+betak*p(k) // // NOTE: we expect that BetaK won't overflow because of // "Sqrt(RK12)<=100*MachineEpsilon*E1" test above. // betak = rk12/rk2; for(i_=0; i_<=n-1;i_++) { state.pk1[i_] = state.rk1[i_]; } for(i_=0; i_<=n-1;i_++) { state.pk1[i_] = state.pk1[i_] + betak*state.pk[i_]; } // // r(k) := r(k+1) // x(k) := x(k+1) // p(k) := p(k+1) // for(i_=0; i_<=n-1;i_++) { state.rk[i_] = state.rk1[i_]; } for(i_=0; i_<=n-1;i_++) { state.xk[i_] = state.xk1[i_]; } for(i_=0; i_<=n-1;i_++) { state.pk[i_] = state.pk1[i_]; } rk2 = rk12; k = k+1; goto lbl_3; lbl_5: // // Calculate E2 // for(i_=0; i_<=n-1;i_++) { state.x[i_] = state.xk[i_]; } state.rstate.stage = 2; goto lbl_rcomm; lbl_2: for(i_=0; i_<=n-1;i_++) { state.rk[i_] = state.b[i_]; } for(i_=0; i_<=n-1;i_++) { state.rk[i_] = state.rk[i_] - state.ax[i_]; } v1 = 0.0; for(i_=0; i_<=n-1;i_++) { v1 += state.rk[i_]*state.rk[i_]; } state.e2 = Math.Sqrt(v1); result = false; return result; // // Saving state // lbl_rcomm: result = true; state.rstate.ia[0] = n; state.rstate.ia[1] = k; state.rstate.ra[0] = rk2; state.rstate.ra[1] = rk12; state.rstate.ra[2] = pap; state.rstate.ra[3] = s; state.rstate.ra[4] = betak; state.rstate.ra[5] = v1; state.rstate.ra[6] = v2; return result; } /************************************************************************* Fast least squares solver, solves well conditioned system without performing any checks for degeneracy, and using user-provided buffers (which are automatically reallocated if too small). This function is intended for solution of moderately sized systems. It uses factorization algorithms based on Level 2 BLAS operations, thus it won't work efficiently on large scale systems. INPUT PARAMETERS: A - array[M,N], system matrix. Contents of A is destroyed during solution. B - array[M], right part M - number of equations N - number of variables, N<=M Tmp0, Tmp1, Tmp2- buffers; function automatically allocates them, if they are too small. They can be reused if function is called several times. OUTPUT PARAMETERS: B - solution (first N components, next M-N are zero) -- ALGLIB -- Copyright 20.01.2012 by Bochkanov Sergey *************************************************************************/ public static void fblssolvels(ref double[,] a, ref double[] b, int m, int n, ref double[] tmp0, ref double[] tmp1, ref double[] tmp2) { int i = 0; int k = 0; double v = 0; int i_ = 0; alglib.ap.assert(n>0, "FBLSSolveLS: N<=0"); alglib.ap.assert(m>=n, "FBLSSolveLS: M=m, "FBLSSolveLS: Rows(A)=n, "FBLSSolveLS: Cols(A)=m, "FBLSSolveLS: Length(B)=0; i--) { v = 0.0; for(i_=i+1; i_<=n-1;i_++) { v += a[i,i_]*b[i_]; } b[i] = (b[i]-v)/a[i,i]; } for(i=n; i<=m-1; i++) { b[i] = 0.0; } } } public class normestimator { /************************************************************************* This object stores state of the iterative norm estimation algorithm. You should use ALGLIB functions to work with this object. *************************************************************************/ public class normestimatorstate : apobject { public int n; public int m; public int nstart; public int nits; public int seedval; public double[] x0; public double[] x1; public double[] t; public double[] xbest; public hqrnd.hqrndstate r; public double[] x; public double[] mv; public double[] mtv; public bool needmv; public bool needmtv; public double repnorm; public rcommstate rstate; public normestimatorstate() { init(); } public override void init() { x0 = new double[0]; x1 = new double[0]; t = new double[0]; xbest = new double[0]; r = new hqrnd.hqrndstate(); x = new double[0]; mv = new double[0]; mtv = new double[0]; rstate = new rcommstate(); } public override alglib.apobject make_copy() { normestimatorstate _result = new normestimatorstate(); _result.n = n; _result.m = m; _result.nstart = nstart; _result.nits = nits; _result.seedval = seedval; _result.x0 = (double[])x0.Clone(); _result.x1 = (double[])x1.Clone(); _result.t = (double[])t.Clone(); _result.xbest = (double[])xbest.Clone(); _result.r = (hqrnd.hqrndstate)r.make_copy(); _result.x = (double[])x.Clone(); _result.mv = (double[])mv.Clone(); _result.mtv = (double[])mtv.Clone(); _result.needmv = needmv; _result.needmtv = needmtv; _result.repnorm = repnorm; _result.rstate = (rcommstate)rstate.make_copy(); return _result; } }; /************************************************************************* This procedure initializes matrix norm estimator. USAGE: 1. User initializes algorithm state with NormEstimatorCreate() call 2. User calls NormEstimatorEstimateSparse() (or NormEstimatorIteration()) 3. User calls NormEstimatorResults() to get solution. INPUT PARAMETERS: M - number of rows in the matrix being estimated, M>0 N - number of columns in the matrix being estimated, N>0 NStart - number of random starting vectors recommended value - at least 5. NIts - number of iterations to do with best starting vector recommended value - at least 5. OUTPUT PARAMETERS: State - structure which stores algorithm state NOTE: this algorithm is effectively deterministic, i.e. it always returns same result when repeatedly called for the same matrix. In fact, algorithm uses randomized starting vectors, but internal random numbers generator always generates same sequence of the random values (it is a feature, not bug). Algorithm can be made non-deterministic with NormEstimatorSetSeed(0) call. -- ALGLIB -- Copyright 06.12.2011 by Bochkanov Sergey *************************************************************************/ public static void normestimatorcreate(int m, int n, int nstart, int nits, normestimatorstate state) { alglib.ap.assert(m>0, "NormEstimatorCreate: M<=0"); alglib.ap.assert(n>0, "NormEstimatorCreate: N<=0"); alglib.ap.assert(nstart>0, "NormEstimatorCreate: NStart<=0"); alglib.ap.assert(nits>0, "NormEstimatorCreate: NIts<=0"); state.m = m; state.n = n; state.nstart = nstart; state.nits = nits; state.seedval = 11; hqrnd.hqrndrandomize(state.r); state.x0 = new double[state.n]; state.t = new double[state.m]; state.x1 = new double[state.n]; state.xbest = new double[state.n]; state.x = new double[Math.Max(state.n, state.m)]; state.mv = new double[state.m]; state.mtv = new double[state.n]; state.rstate.ia = new int[3+1]; state.rstate.ra = new double[2+1]; state.rstate.stage = -1; } /************************************************************************* This function changes seed value used by algorithm. In some cases we need deterministic processing, i.e. subsequent calls must return equal results, in other cases we need non-deterministic algorithm which returns different results for the same matrix on every pass. Setting zero seed will lead to non-deterministic algorithm, while non-zero value will make our algorithm deterministic. INPUT PARAMETERS: State - norm estimator state, must be initialized with a call to NormEstimatorCreate() SeedVal - seed value, >=0. Zero value = non-deterministic algo. -- ALGLIB -- Copyright 06.12.2011 by Bochkanov Sergey *************************************************************************/ public static void normestimatorsetseed(normestimatorstate state, int seedval) { alglib.ap.assert(seedval>=0, "NormEstimatorSetSeed: SeedVal<0"); state.seedval = seedval; } /************************************************************************* -- ALGLIB -- Copyright 06.12.2011 by Bochkanov Sergey *************************************************************************/ public static bool normestimatoriteration(normestimatorstate state) { bool result = new bool(); int n = 0; int m = 0; int i = 0; int itcnt = 0; double v = 0; double growth = 0; double bestgrowth = 0; int i_ = 0; // // Reverse communication preparations // I know it looks ugly, but it works the same way // anywhere from C++ to Python. // // This code initializes locals by: // * random values determined during code // generation - on first subroutine call // * values from previous call - on subsequent calls // if( state.rstate.stage>=0 ) { n = state.rstate.ia[0]; m = state.rstate.ia[1]; i = state.rstate.ia[2]; itcnt = state.rstate.ia[3]; v = state.rstate.ra[0]; growth = state.rstate.ra[1]; bestgrowth = state.rstate.ra[2]; } else { n = -983; m = -989; i = -834; itcnt = 900; v = -287; growth = 364; bestgrowth = 214; } if( state.rstate.stage==0 ) { goto lbl_0; } if( state.rstate.stage==1 ) { goto lbl_1; } if( state.rstate.stage==2 ) { goto lbl_2; } if( state.rstate.stage==3 ) { goto lbl_3; } // // Routine body // n = state.n; m = state.m; if( state.seedval>0 ) { hqrnd.hqrndseed(state.seedval, state.seedval+2, state.r); } bestgrowth = 0; state.xbest[0] = 1; for(i=1; i<=n-1; i++) { state.xbest[i] = 0; } itcnt = 0; lbl_4: if( itcnt>state.nstart-1 ) { goto lbl_6; } do { v = 0; for(i=0; i<=n-1; i++) { state.x0[i] = hqrnd.hqrndnormal(state.r); v = v+math.sqr(state.x0[i]); } } while( (double)(v)==(double)(0) ); v = 1/Math.Sqrt(v); for(i_=0; i_<=n-1;i_++) { state.x0[i_] = v*state.x0[i_]; } for(i_=0; i_<=n-1;i_++) { state.x[i_] = state.x0[i_]; } state.needmv = true; state.needmtv = false; state.rstate.stage = 0; goto lbl_rcomm; lbl_0: for(i_=0; i_<=m-1;i_++) { state.x[i_] = state.mv[i_]; } state.needmv = false; state.needmtv = true; state.rstate.stage = 1; goto lbl_rcomm; lbl_1: for(i_=0; i_<=n-1;i_++) { state.x1[i_] = state.mtv[i_]; } v = 0; for(i=0; i<=n-1; i++) { v = v+math.sqr(state.x1[i]); } growth = Math.Sqrt(Math.Sqrt(v)); if( (double)(growth)>(double)(bestgrowth) ) { v = 1/Math.Sqrt(v); for(i_=0; i_<=n-1;i_++) { state.xbest[i_] = v*state.x1[i_]; } bestgrowth = growth; } itcnt = itcnt+1; goto lbl_4; lbl_6: for(i_=0; i_<=n-1;i_++) { state.x0[i_] = state.xbest[i_]; } itcnt = 0; lbl_7: if( itcnt>state.nits-1 ) { goto lbl_9; } for(i_=0; i_<=n-1;i_++) { state.x[i_] = state.x0[i_]; } state.needmv = true; state.needmtv = false; state.rstate.stage = 2; goto lbl_rcomm; lbl_2: for(i_=0; i_<=m-1;i_++) { state.x[i_] = state.mv[i_]; } state.needmv = false; state.needmtv = true; state.rstate.stage = 3; goto lbl_rcomm; lbl_3: for(i_=0; i_<=n-1;i_++) { state.x1[i_] = state.mtv[i_]; } v = 0; for(i=0; i<=n-1; i++) { v = v+math.sqr(state.x1[i]); } state.repnorm = Math.Sqrt(Math.Sqrt(v)); if( (double)(v)!=(double)(0) ) { v = 1/Math.Sqrt(v); for(i_=0; i_<=n-1;i_++) { state.x0[i_] = v*state.x1[i_]; } } itcnt = itcnt+1; goto lbl_7; lbl_9: result = false; return result; // // Saving state // lbl_rcomm: result = true; state.rstate.ia[0] = n; state.rstate.ia[1] = m; state.rstate.ia[2] = i; state.rstate.ia[3] = itcnt; state.rstate.ra[0] = v; state.rstate.ra[1] = growth; state.rstate.ra[2] = bestgrowth; return result; } /************************************************************************* This function estimates norm of the sparse M*N matrix A. INPUT PARAMETERS: State - norm estimator state, must be initialized with a call to NormEstimatorCreate() A - sparse M*N matrix, must be converted to CRS format prior to calling this function. After this function is over you can call NormEstimatorResults() to get estimate of the norm(A). -- ALGLIB -- Copyright 06.12.2011 by Bochkanov Sergey *************************************************************************/ public static void normestimatorestimatesparse(normestimatorstate state, sparse.sparsematrix a) { normestimatorrestart(state); while( normestimatoriteration(state) ) { if( state.needmv ) { sparse.sparsemv(a, state.x, ref state.mv); continue; } if( state.needmtv ) { sparse.sparsemtv(a, state.x, ref state.mtv); continue; } } } /************************************************************************* Matrix norm estimation results INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: Nrm - estimate of the matrix norm, Nrm>=0 -- ALGLIB -- Copyright 06.12.2011 by Bochkanov Sergey *************************************************************************/ public static void normestimatorresults(normestimatorstate state, ref double nrm) { nrm = 0; nrm = state.repnorm; } /************************************************************************* This function restarts estimator and prepares it for the next estimation round. INPUT PARAMETERS: State - algorithm state -- ALGLIB -- Copyright 06.12.2011 by Bochkanov Sergey *************************************************************************/ public static void normestimatorrestart(normestimatorstate state) { state.rstate.ia = new int[3+1]; state.rstate.ra = new double[2+1]; state.rstate.stage = -1; } } public class matdet { /************************************************************************* Determinant calculation of the matrix given by its LU decomposition. Input parameters: A - LU decomposition of the matrix (output of RMatrixLU subroutine). Pivots - table of permutations which were made during the LU decomposition. Output of RMatrixLU subroutine. N - (optional) size of matrix A: * if given, only principal NxN submatrix is processed and overwritten. other elements are unchanged. * if not given, automatically determined from matrix size (A must be square matrix) Result: matrix determinant. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static double rmatrixludet(double[,] a, int[] pivots, int n) { double result = 0; int i = 0; int s = 0; alglib.ap.assert(n>=1, "RMatrixLUDet: N<1!"); alglib.ap.assert(alglib.ap.len(pivots)>=n, "RMatrixLUDet: Pivots array is too short!"); alglib.ap.assert(alglib.ap.rows(a)>=n, "RMatrixLUDet: rows(A)=n, "RMatrixLUDet: cols(A)=1, "RMatrixDet: N<1!"); alglib.ap.assert(alglib.ap.rows(a)>=n, "RMatrixDet: rows(A)=n, "RMatrixDet: cols(A)=1, "CMatrixLUDet: N<1!"); alglib.ap.assert(alglib.ap.len(pivots)>=n, "CMatrixLUDet: Pivots array is too short!"); alglib.ap.assert(alglib.ap.rows(a)>=n, "CMatrixLUDet: rows(A)=n, "CMatrixLUDet: cols(A)=1, "CMatrixDet: N<1!"); alglib.ap.assert(alglib.ap.rows(a)>=n, "CMatrixDet: rows(A)=n, "CMatrixDet: cols(A)=1, "SPDMatrixCholeskyDet: N<1!"); alglib.ap.assert(alglib.ap.rows(a)>=n, "SPDMatrixCholeskyDet: rows(A)=n, "SPDMatrixCholeskyDet: cols(A)=1, "SPDMatrixDet: N<1!"); alglib.ap.assert(alglib.ap.rows(a)>=n, "SPDMatrixDet: rows(A)=n, "SPDMatrixDet: cols(A)0, "SMatrixGEVDReduce: N<=0!"); alglib.ap.assert((problemtype==1 || problemtype==2) || problemtype==3, "SMatrixGEVDReduce: incorrect ProblemType!"); result = true; // // Problem 1: A*x = lambda*B*x // // Reducing to: // C*y = lambda*y // C = L^(-1) * A * L^(-T) // x = L^(-T) * y // if( problemtype==1 ) { // // Factorize B in T: B = LL' // t = new double[n-1+1, n-1+1]; if( isupperb ) { for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { t[i_,i] = b[i,i_]; } } } else { for(i=0; i<=n-1; i++) { for(i_=0; i_<=i;i_++) { t[i,i_] = b[i,i_]; } } } if( !trfac.spdmatrixcholesky(ref t, n, false) ) { result = false; return result; } // // Invert L in T // matinv.rmatrixtrinverse(ref t, n, false, false, ref info, rep); if( info<=0 ) { result = false; return result; } // // Build L^(-1) * A * L^(-T) in R // w1 = new double[n+1]; w2 = new double[n+1]; r = new double[n-1+1, n-1+1]; for(j=1; j<=n; j++) { // // Form w2 = A * l'(j) (here l'(j) is j-th column of L^(-T)) // i1_ = (0) - (1); for(i_=1; i_<=j;i_++) { w1[i_] = t[j-1,i_+i1_]; } sblas.symmetricmatrixvectormultiply(a, isuppera, 0, j-1, w1, 1.0, ref w2); if( isuppera ) { blas.matrixvectormultiply(a, 0, j-1, j, n-1, true, w1, 1, j, 1.0, ref w2, j+1, n, 0.0); } else { blas.matrixvectormultiply(a, j, n-1, 0, j-1, false, w1, 1, j, 1.0, ref w2, j+1, n, 0.0); } // // Form l(i)*w2 (here l(i) is i-th row of L^(-1)) // for(i=1; i<=n; i++) { i1_ = (1)-(0); v = 0.0; for(i_=0; i_<=i-1;i_++) { v += t[i-1,i_]*w2[i_+i1_]; } r[i-1,j-1] = v; } } // // Copy R to A // for(i=0; i<=n-1; i++) { for(i_=0; i_<=n-1;i_++) { a[i,i_] = r[i,i_]; } } // // Copy L^(-1) from T to R and transpose // isupperr = true; for(i=0; i<=n-1; i++) { for(j=0; j<=i-1; j++) { r[i,j] = 0; } } for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { r[i,i_] = t[i_,i]; } } return result; } // // Problem 2: A*B*x = lambda*x // or // problem 3: B*A*x = lambda*x // // Reducing to: // C*y = lambda*y // C = U * A * U' // B = U'* U // if( problemtype==2 || problemtype==3 ) { // // Factorize B in T: B = U'*U // t = new double[n-1+1, n-1+1]; if( isupperb ) { for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { t[i,i_] = b[i,i_]; } } } else { for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { t[i,i_] = b[i_,i]; } } } if( !trfac.spdmatrixcholesky(ref t, n, true) ) { result = false; return result; } // // Build U * A * U' in R // w1 = new double[n+1]; w2 = new double[n+1]; w3 = new double[n+1]; r = new double[n-1+1, n-1+1]; for(j=1; j<=n; j++) { // // Form w2 = A * u'(j) (here u'(j) is j-th column of U') // i1_ = (j-1) - (1); for(i_=1; i_<=n-j+1;i_++) { w1[i_] = t[j-1,i_+i1_]; } sblas.symmetricmatrixvectormultiply(a, isuppera, j-1, n-1, w1, 1.0, ref w3); i1_ = (1) - (j); for(i_=j; i_<=n;i_++) { w2[i_] = w3[i_+i1_]; } i1_ = (j-1) - (j); for(i_=j; i_<=n;i_++) { w1[i_] = t[j-1,i_+i1_]; } if( isuppera ) { blas.matrixvectormultiply(a, 0, j-2, j-1, n-1, false, w1, j, n, 1.0, ref w2, 1, j-1, 0.0); } else { blas.matrixvectormultiply(a, j-1, n-1, 0, j-2, true, w1, j, n, 1.0, ref w2, 1, j-1, 0.0); } // // Form u(i)*w2 (here u(i) is i-th row of U) // for(i=1; i<=n; i++) { i1_ = (i)-(i-1); v = 0.0; for(i_=i-1; i_<=n-1;i_++) { v += t[i-1,i_]*w2[i_+i1_]; } r[i-1,j-1] = v; } } // // Copy R to A // for(i=0; i<=n-1; i++) { for(i_=0; i_<=n-1;i_++) { a[i,i_] = r[i,i_]; } } if( problemtype==2 ) { // // Invert U in T // matinv.rmatrixtrinverse(ref t, n, true, false, ref info, rep); if( info<=0 ) { result = false; return result; } // // Copy U^-1 from T to R // isupperr = true; for(i=0; i<=n-1; i++) { for(j=0; j<=i-1; j++) { r[i,j] = 0; } } for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { r[i,i_] = t[i,i_]; } } } else { // // Copy U from T to R and transpose // isupperr = false; for(i=0; i<=n-1; i++) { for(j=i+1; j<=n-1; j++) { r[i,j] = 0; } } for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { r[i_,i] = t[i,i_]; } } } } return result; } } public class inverseupdate { /************************************************************************* Inverse matrix update by the Sherman-Morrison formula The algorithm updates matrix A^-1 when adding a number to an element of matrix A. Input parameters: InvA - inverse of matrix A. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. UpdRow - row where the element to be updated is stored. UpdColumn - column where the element to be updated is stored. UpdVal - a number to be added to the element. Output parameters: InvA - inverse of modified matrix A. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ public static void rmatrixinvupdatesimple(ref double[,] inva, int n, int updrow, int updcolumn, double updval) { double[] t1 = new double[0]; double[] t2 = new double[0]; int i = 0; double lambdav = 0; double vt = 0; int i_ = 0; alglib.ap.assert(updrow>=0 && updrow=0 && updcolumn=0. Output parameters: A - contains matrix T. Array whose indexes range within [0..N-1, 0..N-1]. S - contains Schur vectors. Array whose indexes range within [0..N-1, 0..N-1]. Note 1: The block structure of matrix T can be easily recognized: since all the elements below the blocks are zeros, the elements a[i+1,i] which are equal to 0 show the block border. Note 2: The algorithm performance depends on the value of the internal parameter NS of the InternalSchurDecomposition subroutine which defines the number of shifts in the QR algorithm (similarly to the block width in block-matrix algorithms in linear algebra). If you require maximum performance on your machine, it is recommended to adjust this parameter manually. Result: True, if the algorithm has converged and parameters A and S contain the result. False, if the algorithm has not converged. Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library). *************************************************************************/ public static bool rmatrixschur(ref double[,] a, int n, ref double[,] s) { bool result = new bool(); double[] tau = new double[0]; double[] wi = new double[0]; double[] wr = new double[0]; int info = 0; s = new double[0,0]; // // Upper Hessenberg form of the 0-based matrix // ortfac.rmatrixhessenberg(ref a, n, ref tau); ortfac.rmatrixhessenbergunpackq(a, n, tau, ref s); // // Schur decomposition // hsschur.rmatrixinternalschurdecomposition(a, n, 1, 1, ref wr, ref wi, ref s, ref info); result = info==0; return result; } } }