Context Navigation

source: branches/HeuristicLab.Problems.GaussianProcessTuning/HeuristicLab.Eigen/Eigen/src/Core/Redux.h @ 9779

Visit:

Last change on this file since 9779 was 9562, checked in by gkronber, 12 years ago
#1967 worked on Gaussian process evolution.
File size: 13.6 KB

Line
1	// This file is part of Eigen, a lightweight C++ template library
2	// for linear algebra.
3	//
4	// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5	// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6	//
7	// This Source Code Form is subject to the terms of the Mozilla
8	// Public License v. 2.0. If a copy of the MPL was not distributed
9	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11	#ifndef EIGEN_REDUX_H
12	#define EIGEN_REDUX_H
13
14	namespace Eigen {
15
16	namespace internal {
17
18	// TODO
19	// * implement other kind of vectorization
20	// * factorize code
21
22	/***************************************************************************
23	* Part 1 : the logic deciding a strategy for vectorization and unrolling
24	***************************************************************************/
25
26	template<typename Func, typename Derived>
27	struct redux_traits
28	{
29	public:
30	enum {
31	PacketSize = packet_traits<typename Derived::Scalar>::size,
32	InnerMaxSize = int(Derived::IsRowMajor)
33	? Derived::MaxColsAtCompileTime
34	: Derived::MaxRowsAtCompileTime
35	};
36
37	enum {
38	MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
39	&& (functor_traits<Func>::PacketAccess),
40	MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
41	MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
42	};
43
44	public:
45	enum {
46	Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
47	: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
48	: int(DefaultTraversal)
49	};
50
51	public:
52	enum {
53	Cost = ( Derived::SizeAtCompileTime == Dynamic
54	\|\| Derived::CoeffReadCost == Dynamic
55	\|\| (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
56	) ? Dynamic
57	: Derived::SizeAtCompileTime * Derived::CoeffReadCost
58	+ (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
59	UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
60	};
61
62	public:
63	enum {
64	Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
65	? CompleteUnrolling
66	: NoUnrolling
67	};
68	};
69
70	/***************************************************************************
71	* Part 2 : unrollers
72	***************************************************************************/
73
74	/* no vectorization */
75
76	template<typename Func, typename Derived, int Start, int Length>
77	struct redux_novec_unroller
78	{
79	enum {
80	HalfLength = Length/2
81	};
82
83	typedef typename Derived::Scalar Scalar;
84
85	static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
86	{
87	return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
88	redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
89	}
90	};
91
92	template<typename Func, typename Derived, int Start>
93	struct redux_novec_unroller<Func, Derived, Start, 1>
94	{
95	enum {
96	outer = Start / Derived::InnerSizeAtCompileTime,
97	inner = Start % Derived::InnerSizeAtCompileTime
98	};
99
100	typedef typename Derived::Scalar Scalar;
101
102	static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
103	{
104	return mat.coeffByOuterInner(outer, inner);
105	}
106	};
107
108	// This is actually dead code and will never be called. It is required
109	// to prevent false warnings regarding failed inlining though
110	// for 0 length run() will never be called at all.
111	template<typename Func, typename Derived, int Start>
112	struct redux_novec_unroller<Func, Derived, Start, 0>
113	{
114	typedef typename Derived::Scalar Scalar;
115	static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
116	};
117
118	/* vectorization */
119
120	template<typename Func, typename Derived, int Start, int Length>
121	struct redux_vec_unroller
122	{
123	enum {
124	PacketSize = packet_traits<typename Derived::Scalar>::size,
125	HalfLength = Length/2
126	};
127
128	typedef typename Derived::Scalar Scalar;
129	typedef typename packet_traits<Scalar>::type PacketScalar;
130
131	static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
132	{
133	return func.packetOp(
134	redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
135	redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
136	}
137	};
138
139	template<typename Func, typename Derived, int Start>
140	struct redux_vec_unroller<Func, Derived, Start, 1>
141	{
142	enum {
143	index = Start * packet_traits<typename Derived::Scalar>::size,
144	outer = index / int(Derived::InnerSizeAtCompileTime),
145	inner = index % int(Derived::InnerSizeAtCompileTime),
146	alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
147	};
148
149	typedef typename Derived::Scalar Scalar;
150	typedef typename packet_traits<Scalar>::type PacketScalar;
151
152	static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
153	{
154	return mat.template packetByOuterInner<alignment>(outer, inner);
155	}
156	};
157
158	/***************************************************************************
159	* Part 3 : implementation of all cases
160	***************************************************************************/
161
162	template<typename Func, typename Derived,
163	int Traversal = redux_traits<Func, Derived>::Traversal,
164	int Unrolling = redux_traits<Func, Derived>::Unrolling
165	>
166	struct redux_impl;
167
168	template<typename Func, typename Derived>
169	struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
170	{
171	typedef typename Derived::Scalar Scalar;
172	typedef typename Derived::Index Index;
173	static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
174	{
175	eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
176	Scalar res;
177	res = mat.coeffByOuterInner(0, 0);
178	for(Index i = 1; i < mat.innerSize(); ++i)
179	res = func(res, mat.coeffByOuterInner(0, i));
180	for(Index i = 1; i < mat.outerSize(); ++i)
181	for(Index j = 0; j < mat.innerSize(); ++j)
182	res = func(res, mat.coeffByOuterInner(i, j));
183	return res;
184	}
185	};
186
187	template<typename Func, typename Derived>
188	struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
189	: public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
190	{};
191
192	template<typename Func, typename Derived>
193	struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
194	{
195	typedef typename Derived::Scalar Scalar;
196	typedef typename packet_traits<Scalar>::type PacketScalar;
197	typedef typename Derived::Index Index;
198
199	static Scalar run(const Derived& mat, const Func& func)
200	{
201	const Index size = mat.size();
202	eigen_assert(size && "you are using an empty matrix");
203	const Index packetSize = packet_traits<Scalar>::size;
204	const Index alignedStart = internal::first_aligned(mat);
205	enum {
206	alignment = bool(Derived::Flags & DirectAccessBit) \|\| bool(Derived::Flags & AlignedBit)
207	? Aligned : Unaligned
208	};
209	const Index alignedSize2 = ((size-alignedStart)/(2packetSize))(2*packetSize);
210	const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
211	const Index alignedEnd2 = alignedStart + alignedSize2;
212	const Index alignedEnd = alignedStart + alignedSize;
213	Scalar res;
214	if(alignedSize)
215	{
216	PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
217	if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
218	{
219	PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
220	for(Index index = alignedStart + 2packetSize; index < alignedEnd2; index += 2packetSize)
221	{
222	packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
223	packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
224	}
225
226	packet_res0 = func.packetOp(packet_res0,packet_res1);
227	if(alignedEnd>alignedEnd2)
228	packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
229	}
230	res = func.predux(packet_res0);
231
232	for(Index index = 0; index < alignedStart; ++index)
233	res = func(res,mat.coeff(index));
234
235	for(Index index = alignedEnd; index < size; ++index)
236	res = func(res,mat.coeff(index));
237	}
238	else // too small to vectorize anything.
239	// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
240	{
241	res = mat.coeff(0);
242	for(Index index = 1; index < size; ++index)
243	res = func(res,mat.coeff(index));
244	}
245
246	return res;
247	}
248	};
249
250	template<typename Func, typename Derived>
251	struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
252	{
253	typedef typename Derived::Scalar Scalar;
254	typedef typename packet_traits<Scalar>::type PacketScalar;
255	typedef typename Derived::Index Index;
256
257	static Scalar run(const Derived& mat, const Func& func)
258	{
259	eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
260	const Index innerSize = mat.innerSize();
261	const Index outerSize = mat.outerSize();
262	enum {
263	packetSize = packet_traits<Scalar>::size
264	};
265	const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
266	Scalar res;
267	if(packetedInnerSize)
268	{
269	PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
270	for(Index j=0; j<outerSize; ++j)
271	for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
272	packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
273
274	res = func.predux(packet_res);
275	for(Index j=0; j<outerSize; ++j)
276	for(Index i=packetedInnerSize; i<innerSize; ++i)
277	res = func(res, mat.coeffByOuterInner(j,i));
278	}
279	else // too small to vectorize anything.
280	// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
281	{
282	res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
283	}
284
285	return res;
286	}
287	};
288
289	template<typename Func, typename Derived>
290	struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
291	{
292	typedef typename Derived::Scalar Scalar;
293	typedef typename packet_traits<Scalar>::type PacketScalar;
294	enum {
295	PacketSize = packet_traits<Scalar>::size,
296	Size = Derived::SizeAtCompileTime,
297	VectorizedSize = (Size / PacketSize) * PacketSize
298	};
299	static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
300	{
301	eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
302	Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
303	if (VectorizedSize != Size)
304	res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
305	return res;
306	}
307	};
308
309	} // end namespace internal
310
311	/***************************************************************************
312	* Part 4 : public API
313	***************************************************************************/
314
315
316	/** \returns the result of a full redux operation on the whole matrix or vector using \a func
317	*
318	* The template parameter \a BinaryOp is the type of the functor \a func which must be
319	* an associative operator. Both current STL and TR1 functor styles are handled.
320	*
321	* \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
322	*/
323	template<typename Derived>
324	template<typename Func>
325	EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
326	DenseBase<Derived>::redux(const Func& func) const
327	{
328	typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
329	return internal::redux_impl<Func, ThisNested>
330	::run(derived(), func);
331	}
332
333	/** \returns the minimum of all coefficients of *this
334	*/
335	template<typename Derived>
336	EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
337	DenseBase<Derived>::minCoeff() const
338	{
339	return this->redux(Eigen::internal::scalar_min_op<Scalar>());
340	}
341
342	/** \returns the maximum of all coefficients of *this
343	*/
344	template<typename Derived>
345	EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
346	DenseBase<Derived>::maxCoeff() const
347	{
348	return this->redux(Eigen::internal::scalar_max_op<Scalar>());
349	}
350
351	/** \returns the sum of all coefficients of *this
352	*
353	* \sa trace(), prod(), mean()
354	*/
355	template<typename Derived>
356	EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
357	DenseBase<Derived>::sum() const
358	{
359	if(SizeAtCompileTime==0 \|\| (SizeAtCompileTime==Dynamic && size()==0))
360	return Scalar(0);
361	return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
362	}
363
364	/** \returns the mean of all coefficients of *this
365	*
366	* \sa trace(), prod(), sum()
367	*/
368	template<typename Derived>
369	EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
370	DenseBase<Derived>::mean() const
371	{
372	return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
373	}
374
375	/** \returns the product of all coefficients of *this
376	*
377	* Example: \include MatrixBase_prod.cpp
378	* Output: \verbinclude MatrixBase_prod.out
379	*
380	* \sa sum(), mean(), trace()
381	*/
382	template<typename Derived>
383	EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
384	DenseBase<Derived>::prod() const
385	{
386	if(SizeAtCompileTime==0 \|\| (SizeAtCompileTime==Dynamic && size()==0))
387	return Scalar(1);
388	return this->redux(Eigen::internal::scalar_product_op<Scalar>());
389	}
390
391	/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
392	*
393	* \c *this can be any matrix, not necessarily square.
394	*
395	* \sa diagonal(), sum()
396	*/
397	template<typename Derived>
398	EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
399	MatrixBase<Derived>::trace() const
400	{
401	return derived().diagonal().sum();
402	}
403
404	} // end namespace Eigen
405
406	#endif // EIGEN_REDUX_H

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences