/// /// This file is part of ILNumerics Community Edition. /// /// ILNumerics Community Edition - high performance computing for applications. /// Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net /// /// ILNumerics Community Edition is free software: you can redistribute it and/or modify /// it under the terms of the GNU General Public License version 3 as published by /// the Free Software Foundation. /// /// ILNumerics Community Edition is distributed in the hope that it will be useful, /// but WITHOUT ANY WARRANTY; without even the implied warranty of /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the /// GNU General Public License for more details. /// /// You should have received a copy of the GNU General Public License /// along with ILNumerics Community Edition. See the file License.txt in the root /// of your distribution package. If not, see . /// /// In addition this software uses the following components and/or licenses: /// /// ================================================================================= /// The Open Toolkit Library License /// /// Copyright (c) 2006 - 2009 the Open Toolkit library. /// /// Permission is hereby granted, free of charge, to any person obtaining a copy /// of this software and associated documentation files (the "Software"), to deal /// in the Software without restriction, including without limitation the rights to /// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of /// the Software, and to permit persons to whom the Software is furnished to do /// so, subject to the following conditions: /// /// The above copyright notice and this permission notice shall be included in all /// copies or substantial portions of the Software. /// /// ================================================================================= /// using System; using System.Collections.Generic; using System.Text; using ILNumerics; using ILNumerics.Exceptions; using ILNumerics.Storage; using ILNumerics.Misc; namespace ILNumerics { public partial class ILMath { /// Apply an arbitrary function to two arrays /// A function c = f(a,b), which will be applied to elements in A and B /// Input array A /// Input array B /// The combination of A and B. The result and size depends on the inputs: /// /// size(A) == size(B) /// Same size as A/B, elementwise combination of A and B. /// /// /// isscalar(A) || isscalar(B) /// Same size as A or B, whichever is not a scalar, the scalar value being applied to each element /// (i.e. if the non-scalar input is empty, the result is empty). /// /// /// All other cases /// If A or B is a colum vector and the other parameter is an array with a matching column length, the vector is used to operate on all columns of the array. /// Similarly, if one parameter is a row vector, it is used to operate along the rows of the other array if its number of columns matches the vector length. /// /// /// The apply function is also implemented for input if e.g. sizes (mxn) and (mx1). /// In this case the vector argument will be combined to each column, resulting in an (mxn) array. /// This feature is, however, officiallny not supported. public unsafe static ILRetArray apply(Func func, ILInArray A, ILInArray B) { using (ILScope.Enter(A, B)) { int outLen; BinOpItMode mode; double[] retArr; double[] arrA = A.GetArrayForRead(); double[] arrB = B.GetArrayForRead(); ILSize outDims; #region determine operation mode if (A.IsScalar) { outDims = B.Size; if (B.IsScalar) { return new ILRetArray(new double[1] { func(A.GetValue(0), B.GetValue(0)) }, A.Size); } else if (B.IsEmpty) { return ILRetArray.empty(outDims); } else { outLen = outDims.NumberOfElements; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.SAN; } else { mode = BinOpItMode.SAI; } } } else { outDims = A.Size; if (B.IsScalar) { if (A.IsEmpty) { return ILRetArray.empty(A.Size); } outLen = A.S.NumberOfElements; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.ASN; } else { mode = BinOpItMode.ASI; } } else { // array + array if (!A.Size.IsSameSize(B.Size)) { return applyEx(func,A,B); } outLen = A.S.NumberOfElements; if (A.TryGetStorage4InplaceOp(out retArr)) mode = BinOpItMode.AAIA; else if (B.TryGetStorage4InplaceOp(out retArr)) { mode = BinOpItMode.AAIB; } else { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.AAN; } } } #endregion ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int i = 0, workerCount = 1; Action worker = data => { Tuple range = (Tuple)data; double* cLast, cp = (double*)range.Item5 + range.Item1; double scalar; cLast = cp + range.Item2; #region loops switch (mode) { case BinOpItMode.AAIA: double* bp = ((double*)range.Item4 + range.Item1); while (cp < cLast) { *cp = func(*cp, *bp++); cp++; } break; case BinOpItMode.AAIB: double* ap = ((double*)range.Item3 + range.Item1); while (cp < cLast) { *cp = func(*ap++, *cp); cp++; } //ap = ((double*)range.Item3 + range.Item1); //for (int i2 = range.Item2; i2-- > 0; ) { // *(cp + i2) = *(ap + i2) - *(cp + i2); //} //int ie = range.Item1 + range.Item2-1; //double[] locRetArr = retArr; //for (int i2 = range.Item1; i2 < locRetArr.Length; i2++) { // locRetArr[i2] = arrA[i2] - locRetArr[i2]; // if (i2 >= ie) break; //} break; case BinOpItMode.AAN: ap = ((double*)range.Item3 + range.Item1); bp = ((double*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(*ap++, *bp++); } break; case BinOpItMode.ASI: scalar = *((double*)range.Item4); while (cp < cLast) { *cp = func(*cp, scalar); cp++; } break; case BinOpItMode.ASN: ap = ((double*)range.Item3 + range.Item1); scalar = *((double*)range.Item4); while (cp < cLast) { *cp++ = func(*ap++, scalar); } break; case BinOpItMode.SAI: scalar = *((double*)range.Item3); while (cp < cLast) { *cp = func(scalar, *cp); cp++; } break; case BinOpItMode.SAN: scalar = *((double*)range.Item3); bp = ((double*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(scalar, *bp++); } break; default: break; } #endregion System.Threading.Interlocked.Decrement(ref workerCount); //retStorage.PendingEvents.Signal(); }; #region do the work int workItemCount = Settings.s_maxNumberThreads, workItemLength; if (Settings.s_maxNumberThreads > 1 && outLen / 2 > Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / 2; workItemCount = 2; } } else { workItemLength = outLen; workItemCount = 1; } // retStorage.PendingEvents = new System.Threading.CountdownEvent(workItemCount); fixed ( double* arrAP = arrA) fixed ( double* arrBP = arrB) fixed ( double* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength, outLen - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode)); System.Threading.SpinWait.SpinUntil(() => { return workerCount <= 0; }); //while (workerCount > 0) ; } #endregion return new ILRetArray(retStorage); } } private static unsafe ILRetArray applyEx(Func applyFunc, ILInArray A, ILInArray B) { #region parameter checking if (isnull(A) || isnull(B)) return empty(ILSize.Empty00); if (A.IsEmpty) { return empty(B.S); } else if (B.IsEmpty) { return empty(A.S); } //if (A.IsScalar || B.IsScalar || A.D.IsSameSize(B.D)) // return add(A,B); int dim = -1; for (int _L = 0; _L < Math.Max(A.S.NumberOfDimensions, B.S.NumberOfDimensions); _L++) { if (A.S[_L] != B.S[_L]) { if (dim >= 0 || (A.S[_L] != 1 && B.S[_L] != 1)) { throw new ILArgumentException("A and B must have the same size except for one singleton dimension in A or B"); } dim = _L; } } if (dim > 1) throw new ILArgumentException("singleton dimension expansion currently is only supported for colum- and row vectors"); #endregion #region parameter preparation double[] retArr; double[] arrA = A.GetArrayForRead(); double[] arrB = B.GetArrayForRead(); ILSize outDims; BinOptItExMode mode; int arrInc = 0; int arrStepInc = 0; int dimLen = 0; if (A.IsVector) { outDims = B.S; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.VAN; } else { mode = BinOptItExMode.VAI; } dimLen = A.Length; } else if (B.IsVector) { outDims = A.S; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.AVN; } else { mode = BinOptItExMode.AVI; } dimLen = B.Length; } else { throw new ILArgumentException("A and B must have the same size except for one simgleton dimension in A or B"); } arrInc = (dim == 0) ? outDims.SequentialIndexDistance(1) : outDims.SequentialIndexDistance(0); arrStepInc = outDims.SequentialIndexDistance(dim); #endregion #region worker loops definition ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int workerCount = 1; Action worker = data => { // expects: iStart, iLen, ap, bp, cp Tuple range = (Tuple)data; double* ap; double* bp; double* cp; switch (mode) { case BinOptItExMode.VAN: for (int s = 0; s < range.Item2; s++) { ap = (double*)range.Item3; bp = (double*)range.Item4 + range.Item1 + s * arrStepInc; ; cp = (double*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap++; bp += arrInc; cp += arrInc; } } break; case BinOptItExMode.VAI: for (int s = 0; s < range.Item2; s++) { ap = (double*)range.Item3; cp = (double*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *cp); ap++; cp += arrInc; } } break; case BinOptItExMode.AVN: for (int s = 0; s < range.Item2; s++) { ap = (double*)range.Item3 + range.Item1 + s * arrStepInc; bp = (double*)range.Item4; cp = (double*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap += arrInc; bp++; cp += arrInc; } } break; case BinOptItExMode.AVI: for (int s = 0; s < range.Item2; s++) { bp = (double*)range.Item4; cp = (double*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*cp, *bp); bp++; cp += arrInc; } } break; } System.Threading.Interlocked.Decrement(ref workerCount); }; #endregion #region work distribution int i = 0, workItemCount = Settings.s_maxNumberThreads, workItemLength; int outLen = outDims.NumberOfElements; if (Settings.s_maxNumberThreads > 1 && outLen / 2 >= Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / dimLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / dimLen / 2; workItemCount = 2; } } else { workItemLength = outLen / dimLen; workItemCount = 1; } fixed (double* arrAP = arrA) fixed (double* arrBP = arrB) fixed (double* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength * arrStepInc, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength * arrStepInc, (outLen / dimLen) - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP)); ILThreadPool.Wait4Workers(ref workerCount); } #endregion return new ILRetArray(retStorage); } #region HYCALPER AUTO GENERATED CODE /// Apply an arbitrary function to two arrays /// A function c = f(a,b), which will be applied to elements in A and B /// Input array A /// Input array B /// The combination of A and B. The result and size depends on the inputs: /// /// size(A) == size(B) /// Same size as A/B, elementwise combination of A and B. /// /// /// isscalar(A) || isscalar(B) /// Same size as A or B, whichever is not a scalar, the scalar value being applied to each element /// (i.e. if the non-scalar input is empty, the result is empty). /// /// /// All other cases /// If A or B is a colum vector and the other parameter is an array with a matching column length, the vector is used to operate on all columns of the array. /// Similarly, if one parameter is a row vector, it is used to operate along the rows of the other array if its number of columns matches the vector length. /// /// /// The apply function is also implemented for input if e.g. sizes (mxn) and (mx1). /// In this case the vector argument will be combined to each column, resulting in an (mxn) array. /// This feature is, however, officiallny not supported. public unsafe static ILRetArray apply(Func func, ILInArray A, ILInArray B) { using (ILScope.Enter(A, B)) { int outLen; BinOpItMode mode; float[] retArr; float[] arrA = A.GetArrayForRead(); float[] arrB = B.GetArrayForRead(); ILSize outDims; #region determine operation mode if (A.IsScalar) { outDims = B.Size; if (B.IsScalar) { return new ILRetArray(new float[1] { func(A.GetValue(0), B.GetValue(0)) }, A.Size); } else if (B.IsEmpty) { return ILRetArray.empty(outDims); } else { outLen = outDims.NumberOfElements; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.SAN; } else { mode = BinOpItMode.SAI; } } } else { outDims = A.Size; if (B.IsScalar) { if (A.IsEmpty) { return ILRetArray.empty(A.Size); } outLen = A.S.NumberOfElements; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.ASN; } else { mode = BinOpItMode.ASI; } } else { // array + array if (!A.Size.IsSameSize(B.Size)) { return applyEx(func,A,B); } outLen = A.S.NumberOfElements; if (A.TryGetStorage4InplaceOp(out retArr)) mode = BinOpItMode.AAIA; else if (B.TryGetStorage4InplaceOp(out retArr)) { mode = BinOpItMode.AAIB; } else { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.AAN; } } } #endregion ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int i = 0, workerCount = 1; Action worker = data => { Tuple range = (Tuple)data; float* cLast, cp = (float*)range.Item5 + range.Item1; float scalar; cLast = cp + range.Item2; #region loops switch (mode) { case BinOpItMode.AAIA: float* bp = ((float*)range.Item4 + range.Item1); while (cp < cLast) { *cp = func(*cp, *bp++); cp++; } break; case BinOpItMode.AAIB: float* ap = ((float*)range.Item3 + range.Item1); while (cp < cLast) { *cp = func(*ap++, *cp); cp++; } //ap = ((double*)range.Item3 + range.Item1); //for (int i2 = range.Item2; i2-- > 0; ) { // *(cp + i2) = *(ap + i2) - *(cp + i2); //} //int ie = range.Item1 + range.Item2-1; //double[] locRetArr = retArr; //for (int i2 = range.Item1; i2 < locRetArr.Length; i2++) { // locRetArr[i2] = arrA[i2] - locRetArr[i2]; // if (i2 >= ie) break; //} break; case BinOpItMode.AAN: ap = ((float*)range.Item3 + range.Item1); bp = ((float*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(*ap++, *bp++); } break; case BinOpItMode.ASI: scalar = *((float*)range.Item4); while (cp < cLast) { *cp = func(*cp, scalar); cp++; } break; case BinOpItMode.ASN: ap = ((float*)range.Item3 + range.Item1); scalar = *((float*)range.Item4); while (cp < cLast) { *cp++ = func(*ap++, scalar); } break; case BinOpItMode.SAI: scalar = *((float*)range.Item3); while (cp < cLast) { *cp = func(scalar, *cp); cp++; } break; case BinOpItMode.SAN: scalar = *((float*)range.Item3); bp = ((float*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(scalar, *bp++); } break; default: break; } #endregion System.Threading.Interlocked.Decrement(ref workerCount); //retStorage.PendingEvents.Signal(); }; #region do the work int workItemCount = Settings.s_maxNumberThreads, workItemLength; if (Settings.s_maxNumberThreads > 1 && outLen / 2 > Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / 2; workItemCount = 2; } } else { workItemLength = outLen; workItemCount = 1; } // retStorage.PendingEvents = new System.Threading.CountdownEvent(workItemCount); fixed ( float* arrAP = arrA) fixed ( float* arrBP = arrB) fixed ( float* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength, outLen - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode)); System.Threading.SpinWait.SpinUntil(() => { return workerCount <= 0; }); //while (workerCount > 0) ; } #endregion return new ILRetArray(retStorage); } } private static unsafe ILRetArray applyEx(Func applyFunc, ILInArray A, ILInArray B) { #region parameter checking if (isnull(A) || isnull(B)) return empty(ILSize.Empty00); if (A.IsEmpty) { return empty(B.S); } else if (B.IsEmpty) { return empty(A.S); } //if (A.IsScalar || B.IsScalar || A.D.IsSameSize(B.D)) // return add(A,B); int dim = -1; for (int _L = 0; _L < Math.Max(A.S.NumberOfDimensions, B.S.NumberOfDimensions); _L++) { if (A.S[_L] != B.S[_L]) { if (dim >= 0 || (A.S[_L] != 1 && B.S[_L] != 1)) { throw new ILArgumentException("A and B must have the same size except for one singleton dimension in A or B"); } dim = _L; } } if (dim > 1) throw new ILArgumentException("singleton dimension expansion currently is only supported for colum- and row vectors"); #endregion #region parameter preparation float[] retArr; float[] arrA = A.GetArrayForRead(); float[] arrB = B.GetArrayForRead(); ILSize outDims; BinOptItExMode mode; int arrInc = 0; int arrStepInc = 0; int dimLen = 0; if (A.IsVector) { outDims = B.S; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.VAN; } else { mode = BinOptItExMode.VAI; } dimLen = A.Length; } else if (B.IsVector) { outDims = A.S; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.AVN; } else { mode = BinOptItExMode.AVI; } dimLen = B.Length; } else { throw new ILArgumentException("A and B must have the same size except for one simgleton dimension in A or B"); } arrInc = (dim == 0) ? outDims.SequentialIndexDistance(1) : outDims.SequentialIndexDistance(0); arrStepInc = outDims.SequentialIndexDistance(dim); #endregion #region worker loops definition ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int workerCount = 1; Action worker = data => { // expects: iStart, iLen, ap, bp, cp Tuple range = (Tuple)data; float* ap; float* bp; float* cp; switch (mode) { case BinOptItExMode.VAN: for (int s = 0; s < range.Item2; s++) { ap = (float*)range.Item3; bp = (float*)range.Item4 + range.Item1 + s * arrStepInc; ; cp = (float*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap++; bp += arrInc; cp += arrInc; } } break; case BinOptItExMode.VAI: for (int s = 0; s < range.Item2; s++) { ap = (float*)range.Item3; cp = (float*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *cp); ap++; cp += arrInc; } } break; case BinOptItExMode.AVN: for (int s = 0; s < range.Item2; s++) { ap = (float*)range.Item3 + range.Item1 + s * arrStepInc; bp = (float*)range.Item4; cp = (float*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap += arrInc; bp++; cp += arrInc; } } break; case BinOptItExMode.AVI: for (int s = 0; s < range.Item2; s++) { bp = (float*)range.Item4; cp = (float*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*cp, *bp); bp++; cp += arrInc; } } break; } System.Threading.Interlocked.Decrement(ref workerCount); }; #endregion #region work distribution int i = 0, workItemCount = Settings.s_maxNumberThreads, workItemLength; int outLen = outDims.NumberOfElements; if (Settings.s_maxNumberThreads > 1 && outLen / 2 >= Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / dimLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / dimLen / 2; workItemCount = 2; } } else { workItemLength = outLen / dimLen; workItemCount = 1; } fixed (float* arrAP = arrA) fixed (float* arrBP = arrB) fixed (float* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength * arrStepInc, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength * arrStepInc, (outLen / dimLen) - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP)); ILThreadPool.Wait4Workers(ref workerCount); } #endregion return new ILRetArray(retStorage); } /// Apply an arbitrary function to two arrays /// A function c = f(a,b), which will be applied to elements in A and B /// Input array A /// Input array B /// The combination of A and B. The result and size depends on the inputs: /// /// size(A) == size(B) /// Same size as A/B, elementwise combination of A and B. /// /// /// isscalar(A) || isscalar(B) /// Same size as A or B, whichever is not a scalar, the scalar value being applied to each element /// (i.e. if the non-scalar input is empty, the result is empty). /// /// /// All other cases /// If A or B is a colum vector and the other parameter is an array with a matching column length, the vector is used to operate on all columns of the array. /// Similarly, if one parameter is a row vector, it is used to operate along the rows of the other array if its number of columns matches the vector length. /// /// /// The apply function is also implemented for input if e.g. sizes (mxn) and (mx1). /// In this case the vector argument will be combined to each column, resulting in an (mxn) array. /// This feature is, however, officiallny not supported. public unsafe static ILRetArray apply(Func func, ILInArray A, ILInArray B) { using (ILScope.Enter(A, B)) { int outLen; BinOpItMode mode; fcomplex[] retArr; fcomplex[] arrA = A.GetArrayForRead(); fcomplex[] arrB = B.GetArrayForRead(); ILSize outDims; #region determine operation mode if (A.IsScalar) { outDims = B.Size; if (B.IsScalar) { return new ILRetArray(new fcomplex[1] { func(A.GetValue(0), B.GetValue(0)) }, A.Size); } else if (B.IsEmpty) { return ILRetArray.empty(outDims); } else { outLen = outDims.NumberOfElements; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.SAN; } else { mode = BinOpItMode.SAI; } } } else { outDims = A.Size; if (B.IsScalar) { if (A.IsEmpty) { return ILRetArray.empty(A.Size); } outLen = A.S.NumberOfElements; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.ASN; } else { mode = BinOpItMode.ASI; } } else { // array + array if (!A.Size.IsSameSize(B.Size)) { return applyEx(func,A,B); } outLen = A.S.NumberOfElements; if (A.TryGetStorage4InplaceOp(out retArr)) mode = BinOpItMode.AAIA; else if (B.TryGetStorage4InplaceOp(out retArr)) { mode = BinOpItMode.AAIB; } else { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.AAN; } } } #endregion ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int i = 0, workerCount = 1; Action worker = data => { Tuple range = (Tuple)data; fcomplex* cLast, cp = (fcomplex*)range.Item5 + range.Item1; fcomplex scalar; cLast = cp + range.Item2; #region loops switch (mode) { case BinOpItMode.AAIA: fcomplex* bp = ((fcomplex*)range.Item4 + range.Item1); while (cp < cLast) { *cp = func(*cp, *bp++); cp++; } break; case BinOpItMode.AAIB: fcomplex* ap = ((fcomplex*)range.Item3 + range.Item1); while (cp < cLast) { *cp = func(*ap++, *cp); cp++; } //ap = ((double*)range.Item3 + range.Item1); //for (int i2 = range.Item2; i2-- > 0; ) { // *(cp + i2) = *(ap + i2) - *(cp + i2); //} //int ie = range.Item1 + range.Item2-1; //double[] locRetArr = retArr; //for (int i2 = range.Item1; i2 < locRetArr.Length; i2++) { // locRetArr[i2] = arrA[i2] - locRetArr[i2]; // if (i2 >= ie) break; //} break; case BinOpItMode.AAN: ap = ((fcomplex*)range.Item3 + range.Item1); bp = ((fcomplex*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(*ap++, *bp++); } break; case BinOpItMode.ASI: scalar = *((fcomplex*)range.Item4); while (cp < cLast) { *cp = func(*cp, scalar); cp++; } break; case BinOpItMode.ASN: ap = ((fcomplex*)range.Item3 + range.Item1); scalar = *((fcomplex*)range.Item4); while (cp < cLast) { *cp++ = func(*ap++, scalar); } break; case BinOpItMode.SAI: scalar = *((fcomplex*)range.Item3); while (cp < cLast) { *cp = func(scalar, *cp); cp++; } break; case BinOpItMode.SAN: scalar = *((fcomplex*)range.Item3); bp = ((fcomplex*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(scalar, *bp++); } break; default: break; } #endregion System.Threading.Interlocked.Decrement(ref workerCount); //retStorage.PendingEvents.Signal(); }; #region do the work int workItemCount = Settings.s_maxNumberThreads, workItemLength; if (Settings.s_maxNumberThreads > 1 && outLen / 2 > Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / 2; workItemCount = 2; } } else { workItemLength = outLen; workItemCount = 1; } // retStorage.PendingEvents = new System.Threading.CountdownEvent(workItemCount); fixed ( fcomplex* arrAP = arrA) fixed ( fcomplex* arrBP = arrB) fixed ( fcomplex* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength, outLen - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode)); System.Threading.SpinWait.SpinUntil(() => { return workerCount <= 0; }); //while (workerCount > 0) ; } #endregion return new ILRetArray(retStorage); } } private static unsafe ILRetArray applyEx(Func applyFunc, ILInArray A, ILInArray B) { #region parameter checking if (isnull(A) || isnull(B)) return empty(ILSize.Empty00); if (A.IsEmpty) { return empty(B.S); } else if (B.IsEmpty) { return empty(A.S); } //if (A.IsScalar || B.IsScalar || A.D.IsSameSize(B.D)) // return add(A,B); int dim = -1; for (int _L = 0; _L < Math.Max(A.S.NumberOfDimensions, B.S.NumberOfDimensions); _L++) { if (A.S[_L] != B.S[_L]) { if (dim >= 0 || (A.S[_L] != 1 && B.S[_L] != 1)) { throw new ILArgumentException("A and B must have the same size except for one singleton dimension in A or B"); } dim = _L; } } if (dim > 1) throw new ILArgumentException("singleton dimension expansion currently is only supported for colum- and row vectors"); #endregion #region parameter preparation fcomplex[] retArr; fcomplex[] arrA = A.GetArrayForRead(); fcomplex[] arrB = B.GetArrayForRead(); ILSize outDims; BinOptItExMode mode; int arrInc = 0; int arrStepInc = 0; int dimLen = 0; if (A.IsVector) { outDims = B.S; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.VAN; } else { mode = BinOptItExMode.VAI; } dimLen = A.Length; } else if (B.IsVector) { outDims = A.S; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.AVN; } else { mode = BinOptItExMode.AVI; } dimLen = B.Length; } else { throw new ILArgumentException("A and B must have the same size except for one simgleton dimension in A or B"); } arrInc = (dim == 0) ? outDims.SequentialIndexDistance(1) : outDims.SequentialIndexDistance(0); arrStepInc = outDims.SequentialIndexDistance(dim); #endregion #region worker loops definition ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int workerCount = 1; Action worker = data => { // expects: iStart, iLen, ap, bp, cp Tuple range = (Tuple)data; fcomplex* ap; fcomplex* bp; fcomplex* cp; switch (mode) { case BinOptItExMode.VAN: for (int s = 0; s < range.Item2; s++) { ap = (fcomplex*)range.Item3; bp = (fcomplex*)range.Item4 + range.Item1 + s * arrStepInc; ; cp = (fcomplex*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap++; bp += arrInc; cp += arrInc; } } break; case BinOptItExMode.VAI: for (int s = 0; s < range.Item2; s++) { ap = (fcomplex*)range.Item3; cp = (fcomplex*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *cp); ap++; cp += arrInc; } } break; case BinOptItExMode.AVN: for (int s = 0; s < range.Item2; s++) { ap = (fcomplex*)range.Item3 + range.Item1 + s * arrStepInc; bp = (fcomplex*)range.Item4; cp = (fcomplex*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap += arrInc; bp++; cp += arrInc; } } break; case BinOptItExMode.AVI: for (int s = 0; s < range.Item2; s++) { bp = (fcomplex*)range.Item4; cp = (fcomplex*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*cp, *bp); bp++; cp += arrInc; } } break; } System.Threading.Interlocked.Decrement(ref workerCount); }; #endregion #region work distribution int i = 0, workItemCount = Settings.s_maxNumberThreads, workItemLength; int outLen = outDims.NumberOfElements; if (Settings.s_maxNumberThreads > 1 && outLen / 2 >= Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / dimLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / dimLen / 2; workItemCount = 2; } } else { workItemLength = outLen / dimLen; workItemCount = 1; } fixed (fcomplex* arrAP = arrA) fixed (fcomplex* arrBP = arrB) fixed (fcomplex* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength * arrStepInc, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength * arrStepInc, (outLen / dimLen) - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP)); ILThreadPool.Wait4Workers(ref workerCount); } #endregion return new ILRetArray(retStorage); } /// Apply an arbitrary function to two arrays /// A function c = f(a,b), which will be applied to elements in A and B /// Input array A /// Input array B /// The combination of A and B. The result and size depends on the inputs: /// /// size(A) == size(B) /// Same size as A/B, elementwise combination of A and B. /// /// /// isscalar(A) || isscalar(B) /// Same size as A or B, whichever is not a scalar, the scalar value being applied to each element /// (i.e. if the non-scalar input is empty, the result is empty). /// /// /// All other cases /// If A or B is a colum vector and the other parameter is an array with a matching column length, the vector is used to operate on all columns of the array. /// Similarly, if one parameter is a row vector, it is used to operate along the rows of the other array if its number of columns matches the vector length. /// /// /// The apply function is also implemented for input if e.g. sizes (mxn) and (mx1). /// In this case the vector argument will be combined to each column, resulting in an (mxn) array. /// This feature is, however, officiallny not supported. public unsafe static ILRetArray apply(Func func, ILInArray A, ILInArray B) { using (ILScope.Enter(A, B)) { int outLen; BinOpItMode mode; complex[] retArr; complex[] arrA = A.GetArrayForRead(); complex[] arrB = B.GetArrayForRead(); ILSize outDims; #region determine operation mode if (A.IsScalar) { outDims = B.Size; if (B.IsScalar) { return new ILRetArray(new complex[1] { func(A.GetValue(0), B.GetValue(0)) }, A.Size); } else if (B.IsEmpty) { return ILRetArray.empty(outDims); } else { outLen = outDims.NumberOfElements; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.SAN; } else { mode = BinOpItMode.SAI; } } } else { outDims = A.Size; if (B.IsScalar) { if (A.IsEmpty) { return ILRetArray.empty(A.Size); } outLen = A.S.NumberOfElements; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.ASN; } else { mode = BinOpItMode.ASI; } } else { // array + array if (!A.Size.IsSameSize(B.Size)) { return applyEx(func,A,B); } outLen = A.S.NumberOfElements; if (A.TryGetStorage4InplaceOp(out retArr)) mode = BinOpItMode.AAIA; else if (B.TryGetStorage4InplaceOp(out retArr)) { mode = BinOpItMode.AAIB; } else { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.AAN; } } } #endregion ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int i = 0, workerCount = 1; Action worker = data => { Tuple range = (Tuple)data; complex* cLast, cp = (complex*)range.Item5 + range.Item1; complex scalar; cLast = cp + range.Item2; #region loops switch (mode) { case BinOpItMode.AAIA: complex* bp = ((complex*)range.Item4 + range.Item1); while (cp < cLast) { *cp = func(*cp, *bp++); cp++; } break; case BinOpItMode.AAIB: complex* ap = ((complex*)range.Item3 + range.Item1); while (cp < cLast) { *cp = func(*ap++, *cp); cp++; } //ap = ((double*)range.Item3 + range.Item1); //for (int i2 = range.Item2; i2-- > 0; ) { // *(cp + i2) = *(ap + i2) - *(cp + i2); //} //int ie = range.Item1 + range.Item2-1; //double[] locRetArr = retArr; //for (int i2 = range.Item1; i2 < locRetArr.Length; i2++) { // locRetArr[i2] = arrA[i2] - locRetArr[i2]; // if (i2 >= ie) break; //} break; case BinOpItMode.AAN: ap = ((complex*)range.Item3 + range.Item1); bp = ((complex*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(*ap++, *bp++); } break; case BinOpItMode.ASI: scalar = *((complex*)range.Item4); while (cp < cLast) { *cp = func(*cp, scalar); cp++; } break; case BinOpItMode.ASN: ap = ((complex*)range.Item3 + range.Item1); scalar = *((complex*)range.Item4); while (cp < cLast) { *cp++ = func(*ap++, scalar); } break; case BinOpItMode.SAI: scalar = *((complex*)range.Item3); while (cp < cLast) { *cp = func(scalar, *cp); cp++; } break; case BinOpItMode.SAN: scalar = *((complex*)range.Item3); bp = ((complex*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(scalar, *bp++); } break; default: break; } #endregion System.Threading.Interlocked.Decrement(ref workerCount); //retStorage.PendingEvents.Signal(); }; #region do the work int workItemCount = Settings.s_maxNumberThreads, workItemLength; if (Settings.s_maxNumberThreads > 1 && outLen / 2 > Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / 2; workItemCount = 2; } } else { workItemLength = outLen; workItemCount = 1; } // retStorage.PendingEvents = new System.Threading.CountdownEvent(workItemCount); fixed ( complex* arrAP = arrA) fixed ( complex* arrBP = arrB) fixed ( complex* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength, outLen - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode)); System.Threading.SpinWait.SpinUntil(() => { return workerCount <= 0; }); //while (workerCount > 0) ; } #endregion return new ILRetArray(retStorage); } } private static unsafe ILRetArray applyEx(Func applyFunc, ILInArray A, ILInArray B) { #region parameter checking if (isnull(A) || isnull(B)) return empty(ILSize.Empty00); if (A.IsEmpty) { return empty(B.S); } else if (B.IsEmpty) { return empty(A.S); } //if (A.IsScalar || B.IsScalar || A.D.IsSameSize(B.D)) // return add(A,B); int dim = -1; for (int _L = 0; _L < Math.Max(A.S.NumberOfDimensions, B.S.NumberOfDimensions); _L++) { if (A.S[_L] != B.S[_L]) { if (dim >= 0 || (A.S[_L] != 1 && B.S[_L] != 1)) { throw new ILArgumentException("A and B must have the same size except for one singleton dimension in A or B"); } dim = _L; } } if (dim > 1) throw new ILArgumentException("singleton dimension expansion currently is only supported for colum- and row vectors"); #endregion #region parameter preparation complex[] retArr; complex[] arrA = A.GetArrayForRead(); complex[] arrB = B.GetArrayForRead(); ILSize outDims; BinOptItExMode mode; int arrInc = 0; int arrStepInc = 0; int dimLen = 0; if (A.IsVector) { outDims = B.S; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.VAN; } else { mode = BinOptItExMode.VAI; } dimLen = A.Length; } else if (B.IsVector) { outDims = A.S; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.AVN; } else { mode = BinOptItExMode.AVI; } dimLen = B.Length; } else { throw new ILArgumentException("A and B must have the same size except for one simgleton dimension in A or B"); } arrInc = (dim == 0) ? outDims.SequentialIndexDistance(1) : outDims.SequentialIndexDistance(0); arrStepInc = outDims.SequentialIndexDistance(dim); #endregion #region worker loops definition ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int workerCount = 1; Action worker = data => { // expects: iStart, iLen, ap, bp, cp Tuple range = (Tuple)data; complex* ap; complex* bp; complex* cp; switch (mode) { case BinOptItExMode.VAN: for (int s = 0; s < range.Item2; s++) { ap = (complex*)range.Item3; bp = (complex*)range.Item4 + range.Item1 + s * arrStepInc; ; cp = (complex*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap++; bp += arrInc; cp += arrInc; } } break; case BinOptItExMode.VAI: for (int s = 0; s < range.Item2; s++) { ap = (complex*)range.Item3; cp = (complex*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *cp); ap++; cp += arrInc; } } break; case BinOptItExMode.AVN: for (int s = 0; s < range.Item2; s++) { ap = (complex*)range.Item3 + range.Item1 + s * arrStepInc; bp = (complex*)range.Item4; cp = (complex*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap += arrInc; bp++; cp += arrInc; } } break; case BinOptItExMode.AVI: for (int s = 0; s < range.Item2; s++) { bp = (complex*)range.Item4; cp = (complex*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*cp, *bp); bp++; cp += arrInc; } } break; } System.Threading.Interlocked.Decrement(ref workerCount); }; #endregion #region work distribution int i = 0, workItemCount = Settings.s_maxNumberThreads, workItemLength; int outLen = outDims.NumberOfElements; if (Settings.s_maxNumberThreads > 1 && outLen / 2 >= Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / dimLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / dimLen / 2; workItemCount = 2; } } else { workItemLength = outLen / dimLen; workItemCount = 1; } fixed (complex* arrAP = arrA) fixed (complex* arrBP = arrB) fixed (complex* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength * arrStepInc, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength * arrStepInc, (outLen / dimLen) - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP)); ILThreadPool.Wait4Workers(ref workerCount); } #endregion return new ILRetArray(retStorage); } /// Apply an arbitrary function to two arrays /// A function c = f(a,b), which will be applied to elements in A and B /// Input array A /// Input array B /// The combination of A and B. The result and size depends on the inputs: /// /// size(A) == size(B) /// Same size as A/B, elementwise combination of A and B. /// /// /// isscalar(A) || isscalar(B) /// Same size as A or B, whichever is not a scalar, the scalar value being applied to each element /// (i.e. if the non-scalar input is empty, the result is empty). /// /// /// All other cases /// If A or B is a colum vector and the other parameter is an array with a matching column length, the vector is used to operate on all columns of the array. /// Similarly, if one parameter is a row vector, it is used to operate along the rows of the other array if its number of columns matches the vector length. /// /// /// The apply function is also implemented for input if e.g. sizes (mxn) and (mx1). /// In this case the vector argument will be combined to each column, resulting in an (mxn) array. /// This feature is, however, officiallny not supported. public unsafe static ILRetArray apply(Func func, ILInArray A, ILInArray B) { using (ILScope.Enter(A, B)) { int outLen; BinOpItMode mode; Int64[] retArr; Int64[] arrA = A.GetArrayForRead(); Int64[] arrB = B.GetArrayForRead(); ILSize outDims; #region determine operation mode if (A.IsScalar) { outDims = B.Size; if (B.IsScalar) { return new ILRetArray(new Int64[1] { func(A.GetValue(0), B.GetValue(0)) }, A.Size); } else if (B.IsEmpty) { return ILRetArray.empty(outDims); } else { outLen = outDims.NumberOfElements; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.SAN; } else { mode = BinOpItMode.SAI; } } } else { outDims = A.Size; if (B.IsScalar) { if (A.IsEmpty) { return ILRetArray.empty(A.Size); } outLen = A.S.NumberOfElements; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.ASN; } else { mode = BinOpItMode.ASI; } } else { // array + array if (!A.Size.IsSameSize(B.Size)) { return applyEx(func,A,B); } outLen = A.S.NumberOfElements; if (A.TryGetStorage4InplaceOp(out retArr)) mode = BinOpItMode.AAIA; else if (B.TryGetStorage4InplaceOp(out retArr)) { mode = BinOpItMode.AAIB; } else { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.AAN; } } } #endregion ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int i = 0, workerCount = 1; Action worker = data => { Tuple range = (Tuple)data; Int64* cLast, cp = (Int64*)range.Item5 + range.Item1; Int64 scalar; cLast = cp + range.Item2; #region loops switch (mode) { case BinOpItMode.AAIA: Int64* bp = ((Int64*)range.Item4 + range.Item1); while (cp < cLast) { *cp = func(*cp, *bp++); cp++; } break; case BinOpItMode.AAIB: Int64* ap = ((Int64*)range.Item3 + range.Item1); while (cp < cLast) { *cp = func(*ap++, *cp); cp++; } //ap = ((double*)range.Item3 + range.Item1); //for (int i2 = range.Item2; i2-- > 0; ) { // *(cp + i2) = *(ap + i2) - *(cp + i2); //} //int ie = range.Item1 + range.Item2-1; //double[] locRetArr = retArr; //for (int i2 = range.Item1; i2 < locRetArr.Length; i2++) { // locRetArr[i2] = arrA[i2] - locRetArr[i2]; // if (i2 >= ie) break; //} break; case BinOpItMode.AAN: ap = ((Int64*)range.Item3 + range.Item1); bp = ((Int64*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(*ap++, *bp++); } break; case BinOpItMode.ASI: scalar = *((Int64*)range.Item4); while (cp < cLast) { *cp = func(*cp, scalar); cp++; } break; case BinOpItMode.ASN: ap = ((Int64*)range.Item3 + range.Item1); scalar = *((Int64*)range.Item4); while (cp < cLast) { *cp++ = func(*ap++, scalar); } break; case BinOpItMode.SAI: scalar = *((Int64*)range.Item3); while (cp < cLast) { *cp = func(scalar, *cp); cp++; } break; case BinOpItMode.SAN: scalar = *((Int64*)range.Item3); bp = ((Int64*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(scalar, *bp++); } break; default: break; } #endregion System.Threading.Interlocked.Decrement(ref workerCount); //retStorage.PendingEvents.Signal(); }; #region do the work int workItemCount = Settings.s_maxNumberThreads, workItemLength; if (Settings.s_maxNumberThreads > 1 && outLen / 2 > Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / 2; workItemCount = 2; } } else { workItemLength = outLen; workItemCount = 1; } // retStorage.PendingEvents = new System.Threading.CountdownEvent(workItemCount); fixed ( Int64* arrAP = arrA) fixed ( Int64* arrBP = arrB) fixed ( Int64* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength, outLen - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode)); System.Threading.SpinWait.SpinUntil(() => { return workerCount <= 0; }); //while (workerCount > 0) ; } #endregion return new ILRetArray(retStorage); } } private static unsafe ILRetArray applyEx(Func applyFunc, ILInArray A, ILInArray B) { #region parameter checking if (isnull(A) || isnull(B)) return empty(ILSize.Empty00); if (A.IsEmpty) { return empty(B.S); } else if (B.IsEmpty) { return empty(A.S); } //if (A.IsScalar || B.IsScalar || A.D.IsSameSize(B.D)) // return add(A,B); int dim = -1; for (int _L = 0; _L < Math.Max(A.S.NumberOfDimensions, B.S.NumberOfDimensions); _L++) { if (A.S[_L] != B.S[_L]) { if (dim >= 0 || (A.S[_L] != 1 && B.S[_L] != 1)) { throw new ILArgumentException("A and B must have the same size except for one singleton dimension in A or B"); } dim = _L; } } if (dim > 1) throw new ILArgumentException("singleton dimension expansion currently is only supported for colum- and row vectors"); #endregion #region parameter preparation Int64[] retArr; Int64[] arrA = A.GetArrayForRead(); Int64[] arrB = B.GetArrayForRead(); ILSize outDims; BinOptItExMode mode; int arrInc = 0; int arrStepInc = 0; int dimLen = 0; if (A.IsVector) { outDims = B.S; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.VAN; } else { mode = BinOptItExMode.VAI; } dimLen = A.Length; } else if (B.IsVector) { outDims = A.S; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.AVN; } else { mode = BinOptItExMode.AVI; } dimLen = B.Length; } else { throw new ILArgumentException("A and B must have the same size except for one simgleton dimension in A or B"); } arrInc = (dim == 0) ? outDims.SequentialIndexDistance(1) : outDims.SequentialIndexDistance(0); arrStepInc = outDims.SequentialIndexDistance(dim); #endregion #region worker loops definition ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int workerCount = 1; Action worker = data => { // expects: iStart, iLen, ap, bp, cp Tuple range = (Tuple)data; Int64* ap; Int64* bp; Int64* cp; switch (mode) { case BinOptItExMode.VAN: for (int s = 0; s < range.Item2; s++) { ap = (Int64*)range.Item3; bp = (Int64*)range.Item4 + range.Item1 + s * arrStepInc; ; cp = (Int64*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap++; bp += arrInc; cp += arrInc; } } break; case BinOptItExMode.VAI: for (int s = 0; s < range.Item2; s++) { ap = (Int64*)range.Item3; cp = (Int64*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *cp); ap++; cp += arrInc; } } break; case BinOptItExMode.AVN: for (int s = 0; s < range.Item2; s++) { ap = (Int64*)range.Item3 + range.Item1 + s * arrStepInc; bp = (Int64*)range.Item4; cp = (Int64*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap += arrInc; bp++; cp += arrInc; } } break; case BinOptItExMode.AVI: for (int s = 0; s < range.Item2; s++) { bp = (Int64*)range.Item4; cp = (Int64*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*cp, *bp); bp++; cp += arrInc; } } break; } System.Threading.Interlocked.Decrement(ref workerCount); }; #endregion #region work distribution int i = 0, workItemCount = Settings.s_maxNumberThreads, workItemLength; int outLen = outDims.NumberOfElements; if (Settings.s_maxNumberThreads > 1 && outLen / 2 >= Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / dimLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / dimLen / 2; workItemCount = 2; } } else { workItemLength = outLen / dimLen; workItemCount = 1; } fixed (Int64* arrAP = arrA) fixed (Int64* arrBP = arrB) fixed (Int64* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength * arrStepInc, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength * arrStepInc, (outLen / dimLen) - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP)); ILThreadPool.Wait4Workers(ref workerCount); } #endregion return new ILRetArray(retStorage); } /// Apply an arbitrary function to two arrays /// A function c = f(a,b), which will be applied to elements in A and B /// Input array A /// Input array B /// The combination of A and B. The result and size depends on the inputs: /// /// size(A) == size(B) /// Same size as A/B, elementwise combination of A and B. /// /// /// isscalar(A) || isscalar(B) /// Same size as A or B, whichever is not a scalar, the scalar value being applied to each element /// (i.e. if the non-scalar input is empty, the result is empty). /// /// /// All other cases /// If A or B is a colum vector and the other parameter is an array with a matching column length, the vector is used to operate on all columns of the array. /// Similarly, if one parameter is a row vector, it is used to operate along the rows of the other array if its number of columns matches the vector length. /// /// /// The apply function is also implemented for input if e.g. sizes (mxn) and (mx1). /// In this case the vector argument will be combined to each column, resulting in an (mxn) array. /// This feature is, however, officiallny not supported. public unsafe static ILRetArray apply(Func func, ILInArray A, ILInArray B) { using (ILScope.Enter(A, B)) { int outLen; BinOpItMode mode; Int32[] retArr; Int32[] arrA = A.GetArrayForRead(); Int32[] arrB = B.GetArrayForRead(); ILSize outDims; #region determine operation mode if (A.IsScalar) { outDims = B.Size; if (B.IsScalar) { return new ILRetArray(new Int32[1] { func(A.GetValue(0), B.GetValue(0)) }, A.Size); } else if (B.IsEmpty) { return ILRetArray.empty(outDims); } else { outLen = outDims.NumberOfElements; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.SAN; } else { mode = BinOpItMode.SAI; } } } else { outDims = A.Size; if (B.IsScalar) { if (A.IsEmpty) { return ILRetArray.empty(A.Size); } outLen = A.S.NumberOfElements; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.ASN; } else { mode = BinOpItMode.ASI; } } else { // array + array if (!A.Size.IsSameSize(B.Size)) { return applyEx(func,A,B); } outLen = A.S.NumberOfElements; if (A.TryGetStorage4InplaceOp(out retArr)) mode = BinOpItMode.AAIA; else if (B.TryGetStorage4InplaceOp(out retArr)) { mode = BinOpItMode.AAIB; } else { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.AAN; } } } #endregion ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int i = 0, workerCount = 1; Action worker = data => { Tuple range = (Tuple)data; Int32* cLast, cp = (Int32*)range.Item5 + range.Item1; Int32 scalar; cLast = cp + range.Item2; #region loops switch (mode) { case BinOpItMode.AAIA: Int32* bp = ((Int32*)range.Item4 + range.Item1); while (cp < cLast) { *cp = func(*cp, *bp++); cp++; } break; case BinOpItMode.AAIB: Int32* ap = ((Int32*)range.Item3 + range.Item1); while (cp < cLast) { *cp = func(*ap++, *cp); cp++; } //ap = ((double*)range.Item3 + range.Item1); //for (int i2 = range.Item2; i2-- > 0; ) { // *(cp + i2) = *(ap + i2) - *(cp + i2); //} //int ie = range.Item1 + range.Item2-1; //double[] locRetArr = retArr; //for (int i2 = range.Item1; i2 < locRetArr.Length; i2++) { // locRetArr[i2] = arrA[i2] - locRetArr[i2]; // if (i2 >= ie) break; //} break; case BinOpItMode.AAN: ap = ((Int32*)range.Item3 + range.Item1); bp = ((Int32*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(*ap++, *bp++); } break; case BinOpItMode.ASI: scalar = *((Int32*)range.Item4); while (cp < cLast) { *cp = func(*cp, scalar); cp++; } break; case BinOpItMode.ASN: ap = ((Int32*)range.Item3 + range.Item1); scalar = *((Int32*)range.Item4); while (cp < cLast) { *cp++ = func(*ap++, scalar); } break; case BinOpItMode.SAI: scalar = *((Int32*)range.Item3); while (cp < cLast) { *cp = func(scalar, *cp); cp++; } break; case BinOpItMode.SAN: scalar = *((Int32*)range.Item3); bp = ((Int32*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(scalar, *bp++); } break; default: break; } #endregion System.Threading.Interlocked.Decrement(ref workerCount); //retStorage.PendingEvents.Signal(); }; #region do the work int workItemCount = Settings.s_maxNumberThreads, workItemLength; if (Settings.s_maxNumberThreads > 1 && outLen / 2 > Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / 2; workItemCount = 2; } } else { workItemLength = outLen; workItemCount = 1; } // retStorage.PendingEvents = new System.Threading.CountdownEvent(workItemCount); fixed ( Int32* arrAP = arrA) fixed ( Int32* arrBP = arrB) fixed ( Int32* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength, outLen - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode)); System.Threading.SpinWait.SpinUntil(() => { return workerCount <= 0; }); //while (workerCount > 0) ; } #endregion return new ILRetArray(retStorage); } } private static unsafe ILRetArray applyEx(Func applyFunc, ILInArray A, ILInArray B) { #region parameter checking if (isnull(A) || isnull(B)) return empty(ILSize.Empty00); if (A.IsEmpty) { return empty(B.S); } else if (B.IsEmpty) { return empty(A.S); } //if (A.IsScalar || B.IsScalar || A.D.IsSameSize(B.D)) // return add(A,B); int dim = -1; for (int _L = 0; _L < Math.Max(A.S.NumberOfDimensions, B.S.NumberOfDimensions); _L++) { if (A.S[_L] != B.S[_L]) { if (dim >= 0 || (A.S[_L] != 1 && B.S[_L] != 1)) { throw new ILArgumentException("A and B must have the same size except for one singleton dimension in A or B"); } dim = _L; } } if (dim > 1) throw new ILArgumentException("singleton dimension expansion currently is only supported for colum- and row vectors"); #endregion #region parameter preparation Int32[] retArr; Int32[] arrA = A.GetArrayForRead(); Int32[] arrB = B.GetArrayForRead(); ILSize outDims; BinOptItExMode mode; int arrInc = 0; int arrStepInc = 0; int dimLen = 0; if (A.IsVector) { outDims = B.S; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.VAN; } else { mode = BinOptItExMode.VAI; } dimLen = A.Length; } else if (B.IsVector) { outDims = A.S; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.AVN; } else { mode = BinOptItExMode.AVI; } dimLen = B.Length; } else { throw new ILArgumentException("A and B must have the same size except for one simgleton dimension in A or B"); } arrInc = (dim == 0) ? outDims.SequentialIndexDistance(1) : outDims.SequentialIndexDistance(0); arrStepInc = outDims.SequentialIndexDistance(dim); #endregion #region worker loops definition ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int workerCount = 1; Action worker = data => { // expects: iStart, iLen, ap, bp, cp Tuple range = (Tuple)data; Int32* ap; Int32* bp; Int32* cp; switch (mode) { case BinOptItExMode.VAN: for (int s = 0; s < range.Item2; s++) { ap = (Int32*)range.Item3; bp = (Int32*)range.Item4 + range.Item1 + s * arrStepInc; ; cp = (Int32*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap++; bp += arrInc; cp += arrInc; } } break; case BinOptItExMode.VAI: for (int s = 0; s < range.Item2; s++) { ap = (Int32*)range.Item3; cp = (Int32*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *cp); ap++; cp += arrInc; } } break; case BinOptItExMode.AVN: for (int s = 0; s < range.Item2; s++) { ap = (Int32*)range.Item3 + range.Item1 + s * arrStepInc; bp = (Int32*)range.Item4; cp = (Int32*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap += arrInc; bp++; cp += arrInc; } } break; case BinOptItExMode.AVI: for (int s = 0; s < range.Item2; s++) { bp = (Int32*)range.Item4; cp = (Int32*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*cp, *bp); bp++; cp += arrInc; } } break; } System.Threading.Interlocked.Decrement(ref workerCount); }; #endregion #region work distribution int i = 0, workItemCount = Settings.s_maxNumberThreads, workItemLength; int outLen = outDims.NumberOfElements; if (Settings.s_maxNumberThreads > 1 && outLen / 2 >= Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / dimLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / dimLen / 2; workItemCount = 2; } } else { workItemLength = outLen / dimLen; workItemCount = 1; } fixed (Int32* arrAP = arrA) fixed (Int32* arrBP = arrB) fixed (Int32* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength * arrStepInc, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength * arrStepInc, (outLen / dimLen) - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP)); ILThreadPool.Wait4Workers(ref workerCount); } #endregion return new ILRetArray(retStorage); } /// Apply an arbitrary function to two arrays /// A function c = f(a,b), which will be applied to elements in A and B /// Input array A /// Input array B /// The combination of A and B. The result and size depends on the inputs: /// /// size(A) == size(B) /// Same size as A/B, elementwise combination of A and B. /// /// /// isscalar(A) || isscalar(B) /// Same size as A or B, whichever is not a scalar, the scalar value being applied to each element /// (i.e. if the non-scalar input is empty, the result is empty). /// /// /// All other cases /// If A or B is a colum vector and the other parameter is an array with a matching column length, the vector is used to operate on all columns of the array. /// Similarly, if one parameter is a row vector, it is used to operate along the rows of the other array if its number of columns matches the vector length. /// /// /// The apply function is also implemented for input if e.g. sizes (mxn) and (mx1). /// In this case the vector argument will be combined to each column, resulting in an (mxn) array. /// This feature is, however, officiallny not supported. public unsafe static ILRetArray apply(Func func, ILInArray A, ILInArray B) { using (ILScope.Enter(A, B)) { int outLen; BinOpItMode mode; byte[] retArr; byte[] arrA = A.GetArrayForRead(); byte[] arrB = B.GetArrayForRead(); ILSize outDims; #region determine operation mode if (A.IsScalar) { outDims = B.Size; if (B.IsScalar) { return new ILRetArray(new byte[1] { func(A.GetValue(0), B.GetValue(0)) }, A.Size); } else if (B.IsEmpty) { return ILRetArray.empty(outDims); } else { outLen = outDims.NumberOfElements; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.SAN; } else { mode = BinOpItMode.SAI; } } } else { outDims = A.Size; if (B.IsScalar) { if (A.IsEmpty) { return ILRetArray.empty(A.Size); } outLen = A.S.NumberOfElements; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.ASN; } else { mode = BinOpItMode.ASI; } } else { // array + array if (!A.Size.IsSameSize(B.Size)) { return applyEx(func,A,B); } outLen = A.S.NumberOfElements; if (A.TryGetStorage4InplaceOp(out retArr)) mode = BinOpItMode.AAIA; else if (B.TryGetStorage4InplaceOp(out retArr)) { mode = BinOpItMode.AAIB; } else { retArr = ILMemoryPool.Pool.New(outLen); mode = BinOpItMode.AAN; } } } #endregion ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int i = 0, workerCount = 1; Action worker = data => { Tuple range = (Tuple)data; byte* cLast, cp = (byte*)range.Item5 + range.Item1; byte scalar; cLast = cp + range.Item2; #region loops switch (mode) { case BinOpItMode.AAIA: byte* bp = ((byte*)range.Item4 + range.Item1); while (cp < cLast) { *cp = func(*cp, *bp++); cp++; } break; case BinOpItMode.AAIB: byte* ap = ((byte*)range.Item3 + range.Item1); while (cp < cLast) { *cp = func(*ap++, *cp); cp++; } //ap = ((double*)range.Item3 + range.Item1); //for (int i2 = range.Item2; i2-- > 0; ) { // *(cp + i2) = *(ap + i2) - *(cp + i2); //} //int ie = range.Item1 + range.Item2-1; //double[] locRetArr = retArr; //for (int i2 = range.Item1; i2 < locRetArr.Length; i2++) { // locRetArr[i2] = arrA[i2] - locRetArr[i2]; // if (i2 >= ie) break; //} break; case BinOpItMode.AAN: ap = ((byte*)range.Item3 + range.Item1); bp = ((byte*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(*ap++, *bp++); } break; case BinOpItMode.ASI: scalar = *((byte*)range.Item4); while (cp < cLast) { *cp = func(*cp, scalar); cp++; } break; case BinOpItMode.ASN: ap = ((byte*)range.Item3 + range.Item1); scalar = *((byte*)range.Item4); while (cp < cLast) { *cp++ = func(*ap++, scalar); } break; case BinOpItMode.SAI: scalar = *((byte*)range.Item3); while (cp < cLast) { *cp = func(scalar, *cp); cp++; } break; case BinOpItMode.SAN: scalar = *((byte*)range.Item3); bp = ((byte*)range.Item4 + range.Item1); while (cp < cLast) { *cp++ = func(scalar, *bp++); } break; default: break; } #endregion System.Threading.Interlocked.Decrement(ref workerCount); //retStorage.PendingEvents.Signal(); }; #region do the work int workItemCount = Settings.s_maxNumberThreads, workItemLength; if (Settings.s_maxNumberThreads > 1 && outLen / 2 > Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / 2; workItemCount = 2; } } else { workItemLength = outLen; workItemCount = 1; } // retStorage.PendingEvents = new System.Threading.CountdownEvent(workItemCount); fixed ( byte* arrAP = arrA) fixed ( byte* arrBP = arrB) fixed ( byte* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength, outLen - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP, mode)); System.Threading.SpinWait.SpinUntil(() => { return workerCount <= 0; }); //while (workerCount > 0) ; } #endregion return new ILRetArray(retStorage); } } private static unsafe ILRetArray applyEx(Func applyFunc, ILInArray A, ILInArray B) { #region parameter checking if (isnull(A) || isnull(B)) return empty(ILSize.Empty00); if (A.IsEmpty) { return empty(B.S); } else if (B.IsEmpty) { return empty(A.S); } //if (A.IsScalar || B.IsScalar || A.D.IsSameSize(B.D)) // return add(A,B); int dim = -1; for (int _L = 0; _L < Math.Max(A.S.NumberOfDimensions, B.S.NumberOfDimensions); _L++) { if (A.S[_L] != B.S[_L]) { if (dim >= 0 || (A.S[_L] != 1 && B.S[_L] != 1)) { throw new ILArgumentException("A and B must have the same size except for one singleton dimension in A or B"); } dim = _L; } } if (dim > 1) throw new ILArgumentException("singleton dimension expansion currently is only supported for colum- and row vectors"); #endregion #region parameter preparation byte[] retArr; byte[] arrA = A.GetArrayForRead(); byte[] arrB = B.GetArrayForRead(); ILSize outDims; BinOptItExMode mode; int arrInc = 0; int arrStepInc = 0; int dimLen = 0; if (A.IsVector) { outDims = B.S; if (!B.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.VAN; } else { mode = BinOptItExMode.VAI; } dimLen = A.Length; } else if (B.IsVector) { outDims = A.S; if (!A.TryGetStorage4InplaceOp(out retArr)) { retArr = ILMemoryPool.Pool.New(outDims.NumberOfElements); mode = BinOptItExMode.AVN; } else { mode = BinOptItExMode.AVI; } dimLen = B.Length; } else { throw new ILArgumentException("A and B must have the same size except for one simgleton dimension in A or B"); } arrInc = (dim == 0) ? outDims.SequentialIndexDistance(1) : outDims.SequentialIndexDistance(0); arrStepInc = outDims.SequentialIndexDistance(dim); #endregion #region worker loops definition ILDenseStorage retStorage = new ILDenseStorage(retArr, outDims); int workerCount = 1; Action worker = data => { // expects: iStart, iLen, ap, bp, cp Tuple range = (Tuple)data; byte* ap; byte* bp; byte* cp; switch (mode) { case BinOptItExMode.VAN: for (int s = 0; s < range.Item2; s++) { ap = (byte*)range.Item3; bp = (byte*)range.Item4 + range.Item1 + s * arrStepInc; ; cp = (byte*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap++; bp += arrInc; cp += arrInc; } } break; case BinOptItExMode.VAI: for (int s = 0; s < range.Item2; s++) { ap = (byte*)range.Item3; cp = (byte*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *cp); ap++; cp += arrInc; } } break; case BinOptItExMode.AVN: for (int s = 0; s < range.Item2; s++) { ap = (byte*)range.Item3 + range.Item1 + s * arrStepInc; bp = (byte*)range.Item4; cp = (byte*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*ap, *bp); ap += arrInc; bp++; cp += arrInc; } } break; case BinOptItExMode.AVI: for (int s = 0; s < range.Item2; s++) { bp = (byte*)range.Item4; cp = (byte*)range.Item5 + range.Item1 + s * arrStepInc; for (int l = 0; l < dimLen; l++) { *cp = applyFunc(*cp, *bp); bp++; cp += arrInc; } } break; } System.Threading.Interlocked.Decrement(ref workerCount); }; #endregion #region work distribution int i = 0, workItemCount = Settings.s_maxNumberThreads, workItemLength; int outLen = outDims.NumberOfElements; if (Settings.s_maxNumberThreads > 1 && outLen / 2 >= Settings.s_minParallelElement1Count) { if (outLen / workItemCount > Settings.s_minParallelElement1Count) { workItemLength = outLen / dimLen / workItemCount; //workItemLength = (int)((double)outLen / workItemCount * 1.05); } else { workItemLength = outLen / dimLen / 2; workItemCount = 2; } } else { workItemLength = outLen / dimLen; workItemCount = 1; } fixed (byte* arrAP = arrA) fixed (byte* arrBP = arrB) fixed (byte* retArrP = retArr) { for (; i < workItemCount - 1; i++) { Tuple range = new Tuple (i * workItemLength * arrStepInc, workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP); System.Threading.Interlocked.Increment(ref workerCount); ILThreadPool.QueueUserWorkItem(i, worker, range); } // the last (or may the only) chunk is done right here //System.Threading.Interlocked.Increment(ref retStorage.PendingTasks); worker(new Tuple (i * workItemLength * arrStepInc, (outLen / dimLen) - i * workItemLength, (IntPtr)arrAP, (IntPtr)arrBP, (IntPtr)retArrP)); ILThreadPool.Wait4Workers(ref workerCount); } #endregion return new ILRetArray(retStorage); } #endregion HYCALPER AUTO GENERATED CODE } }