Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GaussianProcessTuning/HeuristicLab.Eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h @ 9562

Last change on this file since 9562 was 9562, checked in by gkronber, 11 years ago

#1967 worked on Gaussian process evolution.

File size: 10.2 KB
Line 
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
11#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
12
13namespace Eigen {
14
15namespace internal {
16
17/* Optimized selfadjoint matrix * vector product:
18 * This algorithm processes 2 columns at onces that allows to both reduce
19 * the number of load/stores of the result by a factor 2 and to reduce
20 * the instruction dependency.
21 */
22
23template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
24struct selfadjoint_matrix_vector_product;
25
26template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
27struct selfadjoint_matrix_vector_product
28
29{
30static EIGEN_DONT_INLINE void run(
31  Index size,
32  const Scalar*  lhs, Index lhsStride,
33  const Scalar* _rhs, Index rhsIncr,
34  Scalar* res,
35  Scalar alpha)
36{
37  typedef typename packet_traits<Scalar>::type Packet;
38  typedef typename NumTraits<Scalar>::Real RealScalar;
39  const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
40
41  enum {
42    IsRowMajor = StorageOrder==RowMajor ? 1 : 0,
43    IsLower = UpLo == Lower ? 1 : 0,
44    FirstTriangular = IsRowMajor == IsLower
45  };
46
47  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> cj0;
48  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
49  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd;
50
51  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> pcj0;
52  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
53
54  Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha;
55
56  // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
57  // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
58  // this is because we need to extract packets
59  ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0); 
60  if (rhsIncr!=1)
61  {
62    const Scalar* it = _rhs;
63    for (Index i=0; i<size; ++i, it+=rhsIncr)
64      rhs[i] = *it;
65  }
66
67  Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
68  if (FirstTriangular)
69    bound = size - bound;
70
71  for (Index j=FirstTriangular ? bound : 0;
72       j<(FirstTriangular ? size : bound);j+=2)
73  {
74    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
75    register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
76
77    Scalar t0 = cjAlpha * rhs[j];
78    Packet ptmp0 = pset1<Packet>(t0);
79    Scalar t1 = cjAlpha * rhs[j+1];
80    Packet ptmp1 = pset1<Packet>(t1);
81
82    Scalar t2(0);
83    Packet ptmp2 = pset1<Packet>(t2);
84    Scalar t3(0);
85    Packet ptmp3 = pset1<Packet>(t3);
86
87    size_t starti = FirstTriangular ? 0 : j+2;
88    size_t endi   = FirstTriangular ? j : size;
89    size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
90    size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
91
92    // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
93    res[j]   += cjd.pmul(internal::real(A0[j]), t0);
94    res[j+1] += cjd.pmul(internal::real(A1[j+1]), t1);
95    if(FirstTriangular)
96    {
97      res[j]   += cj0.pmul(A1[j],   t1);
98      t3       += cj1.pmul(A1[j],   rhs[j]);
99    }
100    else
101    {
102      res[j+1] += cj0.pmul(A0[j+1],t0);
103      t2 += cj1.pmul(A0[j+1], rhs[j+1]);
104    }
105
106    for (size_t i=starti; i<alignedStart; ++i)
107    {
108      res[i] += t0 * A0[i] + t1 * A1[i];
109      t2 += conj(A0[i]) * rhs[i];
110      t3 += conj(A1[i]) * rhs[i];
111    }
112    // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
113    // gcc 4.2 does this optimization automatically.
114    const Scalar* EIGEN_RESTRICT a0It  = A0  + alignedStart;
115    const Scalar* EIGEN_RESTRICT a1It  = A1  + alignedStart;
116    const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;
117          Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
118    for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
119    {
120      Packet A0i = ploadu<Packet>(a0It);  a0It  += PacketSize;
121      Packet A1i = ploadu<Packet>(a1It);  a1It  += PacketSize;
122      Packet Bi  = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
123      Packet Xi  = pload <Packet>(resIt);
124
125      Xi    = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
126      ptmp2 = pcj1.pmadd(A0i,  Bi, ptmp2);
127      ptmp3 = pcj1.pmadd(A1i,  Bi, ptmp3);
128      pstore(resIt,Xi); resIt += PacketSize;
129    }
130    for (size_t i=alignedEnd; i<endi; i++)
131    {
132      res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
133      t2 += cj1.pmul(A0[i], rhs[i]);
134      t3 += cj1.pmul(A1[i], rhs[i]);
135    }
136
137    res[j]   += alpha * (t2 + predux(ptmp2));
138    res[j+1] += alpha * (t3 + predux(ptmp3));
139  }
140  for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
141  {
142    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
143
144    Scalar t1 = cjAlpha * rhs[j];
145    Scalar t2(0);
146    // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
147    res[j] += cjd.pmul(internal::real(A0[j]), t1);
148    for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
149    {
150      res[i] += cj0.pmul(A0[i], t1);
151      t2 += cj1.pmul(A0[i], rhs[i]);
152    }
153    res[j] += alpha * t2;
154  }
155}
156};
157
158} // end namespace internal
159
160/***************************************************************************
161* Wrapper to product_selfadjoint_vector
162***************************************************************************/
163
164namespace internal {
165template<typename Lhs, int LhsMode, typename Rhs>
166struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
167  : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
168{};
169}
170
171template<typename Lhs, int LhsMode, typename Rhs>
172struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
173  : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs >
174{
175  EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
176
177  enum {
178    LhsUpLo = LhsMode&(Upper|Lower)
179  };
180
181  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
182
183  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
184  {
185    typedef typename Dest::Scalar ResScalar;
186    typedef typename Base::RhsScalar RhsScalar;
187    typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
188   
189    eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
190
191    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
192    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
193
194    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
195                               * RhsBlasTraits::extractScalarFactor(m_rhs);
196
197    enum {
198      EvalToDest = (Dest::InnerStrideAtCompileTime==1),
199      UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1)
200    };
201   
202    internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
203    internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
204
205    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
206                                                  EvalToDest ? dest.data() : static_dest.data());
207                                                 
208    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
209        UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
210   
211    if(!EvalToDest)
212    {
213      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
214      int size = dest.size();
215      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
216      #endif
217      MappedDest(actualDestPtr, dest.size()) = dest;
218    }
219     
220    if(!UseRhs)
221    {
222      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
223      int size = rhs.size();
224      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
225      #endif
226      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
227    }
228     
229     
230    internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
231      (
232        lhs.rows(),                             // size
233        &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info
234        actualRhsPtr, 1,                        // rhs info
235        actualDestPtr,                          // result info
236        actualAlpha                             // scale factor
237      );
238   
239    if(!EvalToDest)
240      dest = MappedDest(actualDestPtr, dest.size());
241  }
242};
243
244namespace internal {
245template<typename Lhs, typename Rhs, int RhsMode>
246struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
247  : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
248{};
249}
250
251template<typename Lhs, typename Rhs, int RhsMode>
252struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
253  : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
254{
255  EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
256
257  enum {
258    RhsUpLo = RhsMode&(Upper|Lower)
259  };
260
261  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
262
263  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
264  {
265    // let's simply transpose the product
266    Transpose<Dest> destT(dest);
267    SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
268                             Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha);
269  }
270};
271
272} // end namespace Eigen
273
274#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
Note: See TracBrowser for help on using the repository browser.