openrave.org

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
mathextra.h
Go to the documentation of this file.
1 // -*- coding: utf-8 -*-
2 // Copyright (C) 2006-2011 Rosen Diankov <rosen.diankov@gmail.com>
3 //
4 // This file is part of OpenRAVE.
5 // OpenRAVE is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU Lesser General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #ifndef OPENRAVE_MATHEXTRA_H
21 #define OPENRAVE_MATHEXTRA_H
22 
23 #ifndef OPENRAVE_API
24 #define OPENRAVE_API
25 #endif
26 
27 #ifdef BOOST_ASSERT
28 #define MATH_ASSERT BOOST_ASSERT
29 #else
30 #include <cassert>
31 #define MATH_ASSERT assert
32 #endif
33 
34 #include <cmath>
35 #include <climits>
36 
37 namespace OpenRAVE {
38 
40 namespace mathextra {
41 
42 #define distinctRoots 0 // roots r0 < r1 < r2
43 #define singleRoot 1 // root r0
44 #define floatRoot01 2 // roots r0 = r1 < r2
45 #define floatRoot12 4 // roots r0 < r1 = r2
46 #define tripleRoot 6 // roots r0 = r1 = r2
47 
48 // multiplies 4x4 matrices
49 inline float* mult4(float* pfres, const float* pf1, const float* pf2);
50 
51 // pf1^T * pf2
52 inline float* multtrans3(float* pfres, const float* pf1, const float* pf2);
53 inline float* multtrans4(float* pfres, const float* pf1, const float* pf2);
54 inline float* transnorm3(float* pfout, const float* pfmat, const float* pf);
55 
56 inline float* transpose3(const float* pf, float* pfres);
57 inline float* transpose4(const float* pf, float* pfres);
58 
59 inline float dot2(const float* pf1, const float* pf2);
60 inline float dot3(const float* pf1, const float* pf2);
61 inline float dot4(const float* pf1, const float* pf2);
62 
63 inline float lengthsqr2(const float* pf);
64 inline float lengthsqr3(const float* pf);
65 inline float lengthsqr4(const float* pf);
66 
67 inline float* normalize2(float* pfout, const float* pf);
68 inline float* normalize3(float* pfout, const float* pf);
69 inline float* normalize4(float* pfout, const float* pf);
70 
71 inline float* cross3(float* pfout, const float* pf1, const float* pf2);
72 
73 // multiplies 3x3 matrices
74 inline float* mult3_s4(float* pfres, const float* pf1, const float* pf2);
75 inline float* mult3_s3(float* pfres, const float* pf1, const float* pf2);
76 
77 inline float* inv3(const float* pf, float* pfres, float* pfdet, int stride);
78 inline float* inv4(const float* pf, float* pfres);
79 
80 
81 inline double* mult4(double* pfres, const double* pf1, const double* pf2);
82 
83 // pf1^T * pf2
84 inline double* multtrans3(double* pfres, const double* pf1, const double* pf2);
85 inline double* multtrans4(double* pfres, const double* pf1, const double* pf2);
86 inline double* transnorm3(double* pfout, const double* pfmat, const double* pf);
87 
88 inline double* transpose3(const double* pf, double* pfres);
89 inline double* transpose4(const double* pf, double* pfres);
90 
91 inline double dot2(const double* pf1, const double* pf2);
92 inline double dot3(const double* pf1, const double* pf2);
93 inline double dot4(const double* pf1, const double* pf2);
94 
95 inline double lengthsqr2(const double* pf);
96 inline double lengthsqr3(const double* pf);
97 inline double lengthsqr4(const double* pf);
98 
99 inline double* normalize2(double* pfout, const double* pf);
100 inline double* normalize3(double* pfout, const double* pf);
101 inline double* normalize4(double* pfout, const double* pf);
102 
103 inline double* cross3(double* pfout, const double* pf1, const double* pf2);
104 
105 // multiplies 3x3 matrices
106 inline double* mult3_s4(double* pfres, const double* pf1, const double* pf2);
107 inline double* mult3_s3(double* pfres, const double* pf1, const double* pf2);
108 
109 inline double* inv3(const double* pf, double* pfres, double* pfdet, int stride);
110 inline double* inv4(const double* pf, double* pfres);
111 
112 
114 // More complex ops that deal with arbitrary matrices //
116 
119 template <typename T>
120 inline bool eig2(const T* pfmat, T* peigs, T& fv1x, T& fv1y, T& fv2x, T& fv2y);
121 
122 // Simple routines for linear algebra algorithms //
123 
124 OPENRAVE_API int CubicRoots (double c0, double c1, double c2, double *r0, double *r1, double *r2);
125 template <typename T, typename S> void Tridiagonal3 (S* mat, T* diag, T* subd);
126 OPENRAVE_API bool QLAlgorithm3 (float* m_aafEntry, float* afDiag, float* afSubDiag);
127 OPENRAVE_API bool QLAlgorithm3 (double* m_aafEntry, double* afDiag, double* afSubDiag);
128 OPENRAVE_API void EigenSymmetric3(const double* fCovariance, double* eval, double* fAxes);
129 
133 template <typename T>
134 inline void GetCovarBasisVectors(const T fCovariance[3][3], T vbasis[3][3])
135 {
136  T EigenVals[3];
137  EigenSymmetric3((const T*)fCovariance, EigenVals, (T*)vbasis);
138  // make sure that the new axes follow the right-hand coord system
139  normalize3(vbasis[0],vbasis[0]);
140  T f = dot3(vbasis[0],vbasis[1]);
141  vbasis[1][0] -= vbasis[0][0]*f; vbasis[1][1] -= vbasis[0][1]*f; vbasis[1][2] -= vbasis[0][2]*f;
142  normalize3(vbasis[1],vbasis[1]);
143  cross3(vbasis[2],vbasis[0],vbasis[1]);
144 }
145 
152 template <typename T> inline void svd3(const T* A, T* U, T* D, T* V);
153 
154 template <typename T> inline void mult(T* pf, T fa, int r)
155 {
156  MATH_ASSERT( pf != NULL );
157  while(r > 0) {
158  --r;
159  pf[r] *= fa;
160  }
161 }
162 
163 template <typename T> int Min(T* pts, int stride, int numPts); // returns the index, stride in units of T
164 template <typename T> int Max(T* pts, int stride, int numPts); // returns the index
165 
166 // multiplies a matrix by a scalar
167 template <typename T> inline void mult(T* pf, T fa, int r);
168 
169 // multiplies a r1xc1 by c1xc2 matrix into pfres, if badd is true adds the result to pfres
170 // does not handle cases where pfres is equal to pf1 or pf2, use multtox for those cases
171 template <typename T, typename R, typename S>
172 inline S* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd = false);
173 
174 // pf1 is transposed before mult
175 // rows of pf2 must equal rows of pf1
176 // pfres will be c1xc2 matrix
177 template <typename T, typename R, typename S>
178 inline S* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd = false);
179 
180 // pf2 is transposed before mult
181 // the columns of both matrices must be the same and equal to c1
182 // r2 is the number of rows in pf2
183 // pfres must be an r1xr2 matrix
184 template <typename T, typename R, typename S>
185 inline S* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool badd = false);
186 
187 // multiplies rxc matrix pf1 and cxc matrix pf2 and stores the result in pf1,
188 // the function needs a temporary buffer the size of c doubles, if pftemp == NULL,
189 // the function will allocate the necessary memory, otherwise pftemp should be big
190 // enough to store all the entries
191 template <typename T> inline T* multto1(T* pf1, T* pf2, int r1, int c1, T* pftemp = NULL);
192 
193 // same as multto1 except stores the result in pf2, pf1 has to be an r2xr2 matrix
194 // pftemp must be of size r2 if not NULL
195 template <typename T, typename S> inline T* multto2(T* pf1, S* pf2, int r2, int c2, S* pftemp = NULL);
196 
197 // add pf1 + pf2 and store in pf1
198 template <typename T> inline void sub(T* pf1, T* pf2, int r);
199 template <typename T> inline T normsqr(const T* pf1, int r);
200 template <typename T> inline T lengthsqr(const T* pf1, const T* pf2, int length);
201 template <typename T> inline T dot(T* pf1, T* pf2, int length);
202 
203 template <typename T> inline T sum(T* pf, int length);
204 
206 template <typename T> inline bool inv2(T* pf, T* pfres);
207 
209 // Function Definitions
211 template <typename T>
212 bool eig2(const T* pfmat, T* peigs, T& fv1x, T& fv1y, T& fv2x, T& fv2y)
213 {
214  // x^2 + bx + c
215  T a, b, c, d;
216  b = -(pfmat[0] + pfmat[3]);
217  c = pfmat[0] * pfmat[3] - pfmat[1] * pfmat[2];
218  d = b * b - 4.0f * c + 1e-16f;
219  if( d < 0 ) {
220  return false;
221  }
222  if( d < 1e-16f ) {
223  a = -0.5f * b;
224  peigs[0] = a;
225  peigs[1] = a;
226  fv1x = pfmat[1];
227  fv1y = a - pfmat[0];
228  c = 1 / sqrt(fv1x*fv1x + fv1y*fv1y);
229  fv1x *= c;
230  fv1y *= c;
231  fv2x = -fv1y;
232  fv2y = fv1x;
233  return true;
234  }
235  // two roots
236  d = sqrt(d);
237  a = -0.5f * (b + d);
238  peigs[0] = a;
239  fv1x = pfmat[1];
240  fv1y = a-pfmat[0];
241  c = 1 / sqrt(fv1x*fv1x + fv1y*fv1y);
242  fv1x *= c;
243  fv1y *= c;
244  a += d;
245  peigs[1] = a;
246  fv2x = pfmat[1];
247  fv2y = a-pfmat[0];
248  c = 1 / sqrt(fv2x*fv2x + fv2y*fv2y);
249  fv2x *= c;
250  fv2y *= c;
251  return true;
252 }
253 
254 // returns the number of real roots, fills r1 and r2 with the answers
255 template <typename T>
256 inline int solvequad(T a, T b, T c, T& r1, T& r2)
257 {
258  T d = b * b - (T)4 * c * a + (T)1e-16;
259  if( d < 0 ) {
260  return 0;
261  }
262  if( d < (T)1e-16 ) {
263  r1 = r2 = (T)-0.5 * b / a;
264  return 1;
265  }
266  // two roots
267  d = sqrt(d);
268  r1 = (-b+d)/(2*a);
269  r2 = c/(a*r1);
270  return 2;
271 }
272 
273 #define MULT3(stride) { \
274  pfres2[0*stride+0] = pf1[0*stride+0]*pf2[0*stride+0]+pf1[0*stride+1]*pf2[1*stride+0]+pf1[0*stride+2]*pf2[2*stride+0]; \
275  pfres2[0*stride+1] = pf1[0*stride+0]*pf2[0*stride+1]+pf1[0*stride+1]*pf2[1*stride+1]+pf1[0*stride+2]*pf2[2*stride+1]; \
276  pfres2[0*stride+2] = pf1[0*stride+0]*pf2[0*stride+2]+pf1[0*stride+1]*pf2[1*stride+2]+pf1[0*stride+2]*pf2[2*stride+2]; \
277  pfres2[1*stride+0] = pf1[1*stride+0]*pf2[0*stride+0]+pf1[1*stride+1]*pf2[1*stride+0]+pf1[1*stride+2]*pf2[2*stride+0]; \
278  pfres2[1*stride+1] = pf1[1*stride+0]*pf2[0*stride+1]+pf1[1*stride+1]*pf2[1*stride+1]+pf1[1*stride+2]*pf2[2*stride+1]; \
279  pfres2[1*stride+2] = pf1[1*stride+0]*pf2[0*stride+2]+pf1[1*stride+1]*pf2[1*stride+2]+pf1[1*stride+2]*pf2[2*stride+2]; \
280  pfres2[2*stride+0] = pf1[2*stride+0]*pf2[0*stride+0]+pf1[2*stride+1]*pf2[1*stride+0]+pf1[2*stride+2]*pf2[2*stride+0]; \
281  pfres2[2*stride+1] = pf1[2*stride+0]*pf2[0*stride+1]+pf1[2*stride+1]*pf2[1*stride+1]+pf1[2*stride+2]*pf2[2*stride+1]; \
282  pfres2[2*stride+2] = pf1[2*stride+0]*pf2[0*stride+2]+pf1[2*stride+1]*pf2[1*stride+2]+pf1[2*stride+2]*pf2[2*stride+2]; \
283 }
284 
286 template <typename T>
287 inline T* _mult3_s4(T* pfres, const T* pf1, const T* pf2)
288 {
289  MATH_ASSERT( pf1 != NULL && pf2 != NULL && pfres != NULL );
290 
291  T* pfres2;
292  if((pfres == pf1)||(pfres == pf2)) pfres2 = (T*)alloca(12 * sizeof(T));
293  else pfres2 = pfres;
294 
295  MULT3(4);
296  if( pfres2 != pfres ) memcpy(pfres, pfres2, 12*sizeof(T));
297  return pfres;
298 }
299 
301 template <typename T>
302 inline T* _mult3_s3(T* pfres, const T* pf1, const T* pf2)
303 {
304  MATH_ASSERT( pf1 != NULL && pf2 != NULL && pfres != NULL );
305 
306  T* pfres2;
307  if((pfres == pf1)||(pfres == pf2)) pfres2 = (T*)alloca(9 * sizeof(T));
308  else pfres2 = pfres;
309 
310  MULT3(3);
311 
312  if( pfres2 != pfres ) memcpy(pfres, pfres2, 9*sizeof(T));
313 
314  return pfres;
315 }
316 
317 // mult4
318 template <typename T>
319 inline T* _mult4(T* pfres, const T* p1, const T* p2)
320 {
321  MATH_ASSERT( pfres != NULL && p1 != NULL && p2 != NULL );
322 
323  T* pfres2;
324  if((pfres == p1)||(pfres == p2)) pfres2 = (T*)alloca(16 * sizeof(T));
325  else pfres2 = pfres;
326 
327  pfres2[0*4+0] = p1[0*4+0]*p2[0*4+0] + p1[0*4+1]*p2[1*4+0] + p1[0*4+2]*p2[2*4+0] + p1[0*4+3]*p2[3*4+0];
328  pfres2[0*4+1] = p1[0*4+0]*p2[0*4+1] + p1[0*4+1]*p2[1*4+1] + p1[0*4+2]*p2[2*4+1] + p1[0*4+3]*p2[3*4+1];
329  pfres2[0*4+2] = p1[0*4+0]*p2[0*4+2] + p1[0*4+1]*p2[1*4+2] + p1[0*4+2]*p2[2*4+2] + p1[0*4+3]*p2[3*4+2];
330  pfres2[0*4+3] = p1[0*4+0]*p2[0*4+3] + p1[0*4+1]*p2[1*4+3] + p1[0*4+2]*p2[2*4+3] + p1[0*4+3]*p2[3*4+3];
331 
332  pfres2[1*4+0] = p1[1*4+0]*p2[0*4+0] + p1[1*4+1]*p2[1*4+0] + p1[1*4+2]*p2[2*4+0] + p1[1*4+3]*p2[3*4+0];
333  pfres2[1*4+1] = p1[1*4+0]*p2[0*4+1] + p1[1*4+1]*p2[1*4+1] + p1[1*4+2]*p2[2*4+1] + p1[1*4+3]*p2[3*4+1];
334  pfres2[1*4+2] = p1[1*4+0]*p2[0*4+2] + p1[1*4+1]*p2[1*4+2] + p1[1*4+2]*p2[2*4+2] + p1[1*4+3]*p2[3*4+2];
335  pfres2[1*4+3] = p1[1*4+0]*p2[0*4+3] + p1[1*4+1]*p2[1*4+3] + p1[1*4+2]*p2[2*4+3] + p1[1*4+3]*p2[3*4+3];
336 
337  pfres2[2*4+0] = p1[2*4+0]*p2[0*4+0] + p1[2*4+1]*p2[1*4+0] + p1[2*4+2]*p2[2*4+0] + p1[2*4+3]*p2[3*4+0];
338  pfres2[2*4+1] = p1[2*4+0]*p2[0*4+1] + p1[2*4+1]*p2[1*4+1] + p1[2*4+2]*p2[2*4+1] + p1[2*4+3]*p2[3*4+1];
339  pfres2[2*4+2] = p1[2*4+0]*p2[0*4+2] + p1[2*4+1]*p2[1*4+2] + p1[2*4+2]*p2[2*4+2] + p1[2*4+3]*p2[3*4+2];
340  pfres2[2*4+3] = p1[2*4+0]*p2[0*4+3] + p1[2*4+1]*p2[1*4+3] + p1[2*4+2]*p2[2*4+3] + p1[2*4+3]*p2[3*4+3];
341 
342  pfres2[3*4+0] = p1[3*4+0]*p2[0*4+0] + p1[3*4+1]*p2[1*4+0] + p1[3*4+2]*p2[2*4+0] + p1[3*4+3]*p2[3*4+0];
343  pfres2[3*4+1] = p1[3*4+0]*p2[0*4+1] + p1[3*4+1]*p2[1*4+1] + p1[3*4+2]*p2[2*4+1] + p1[3*4+3]*p2[3*4+1];
344  pfres2[3*4+2] = p1[3*4+0]*p2[0*4+2] + p1[3*4+1]*p2[1*4+2] + p1[3*4+2]*p2[2*4+2] + p1[3*4+3]*p2[3*4+2];
345  pfres2[3*4+3] = p1[3*4+0]*p2[0*4+3] + p1[3*4+1]*p2[1*4+3] + p1[3*4+2]*p2[2*4+3] + p1[3*4+3]*p2[3*4+3];
346 
347  if( pfres != pfres2 ) memcpy(pfres, pfres2, sizeof(T)*16);
348  return pfres;
349 }
350 
351 template <typename T>
352 inline T* _multtrans3(T* pfres, const T* pf1, const T* pf2)
353 {
354  T* pfres2;
355  if( pfres == pf1 ) pfres2 = (T*)alloca(9 * sizeof(T));
356  else pfres2 = pfres;
357 
358  pfres2[0] = pf1[0]*pf2[0]+pf1[3]*pf2[3]+pf1[6]*pf2[6];
359  pfres2[1] = pf1[0]*pf2[1]+pf1[3]*pf2[4]+pf1[6]*pf2[7];
360  pfres2[2] = pf1[0]*pf2[2]+pf1[3]*pf2[5]+pf1[6]*pf2[8];
361 
362  pfres2[3] = pf1[1]*pf2[0]+pf1[4]*pf2[3]+pf1[7]*pf2[6];
363  pfres2[4] = pf1[1]*pf2[1]+pf1[4]*pf2[4]+pf1[7]*pf2[7];
364  pfres2[5] = pf1[1]*pf2[2]+pf1[4]*pf2[5]+pf1[7]*pf2[8];
365 
366  pfres2[6] = pf1[2]*pf2[0]+pf1[5]*pf2[3]+pf1[8]*pf2[6];
367  pfres2[7] = pf1[2]*pf2[1]+pf1[5]*pf2[4]+pf1[8]*pf2[7];
368  pfres2[8] = pf1[2]*pf2[2]+pf1[5]*pf2[5]+pf1[8]*pf2[8];
369 
370  if( pfres2 != pfres ) memcpy(pfres, pfres2, 9*sizeof(T));
371 
372  return pfres;
373 }
374 
375 template <typename T>
376 inline T* _multtrans4(T* pfres, const T* pf1, const T* pf2)
377 {
378  T* pfres2;
379  if( pfres == pf1 ) pfres2 = (T*)alloca(16 * sizeof(T));
380  else pfres2 = pfres;
381 
382  for(int i = 0; i < 4; ++i) {
383  for(int j = 0; j < 4; ++j) {
384  pfres2[4*i+j] = pf1[i] * pf2[j] + pf1[i+4] * pf2[j+4] + pf1[i+8] * pf2[j+8] + pf1[i+12] * pf2[j+12];
385  }
386  }
387 
388  if( pfres2 != pfres ) memcpy(pfres, pfres2, 16*sizeof(T));
389 
390  return pfres;
391 }
392 
394 template <typename T> inline T matrixdet3(const T* pf, int stride)
395 {
396  return pf[0*stride+2] * (pf[1*stride + 0] * pf[2*stride + 1] - pf[1*stride + 1] * pf[2*stride + 0]) +
397  pf[1*stride+2] * (pf[0*stride + 1] * pf[2*stride + 0] - pf[0*stride + 0] * pf[2*stride + 1]) +
398  pf[2*stride+2] * (pf[0*stride + 0] * pf[1*stride + 1] - pf[0*stride + 1] * pf[1*stride + 0]);
399 }
400 
408 template <typename T>
409 inline T* _inv3(const T* pf, T* pfres, T* pfdet, int stride)
410 {
411  T* pfres2;
412  if( pfres == pf ) pfres2 = (T*)alloca(3 * stride * sizeof(T));
413  else pfres2 = pfres;
414 
415  // inverse = C^t / det(pf) where C is the matrix of coefficients
416 
417  // calc C^t
418  pfres2[0*stride + 0] = pf[1*stride + 1] * pf[2*stride + 2] - pf[1*stride + 2] * pf[2*stride + 1];
419  pfres2[0*stride + 1] = pf[0*stride + 2] * pf[2*stride + 1] - pf[0*stride + 1] * pf[2*stride + 2];
420  pfres2[0*stride + 2] = pf[0*stride + 1] * pf[1*stride + 2] - pf[0*stride + 2] * pf[1*stride + 1];
421  pfres2[1*stride + 0] = pf[1*stride + 2] * pf[2*stride + 0] - pf[1*stride + 0] * pf[2*stride + 2];
422  pfres2[1*stride + 1] = pf[0*stride + 0] * pf[2*stride + 2] - pf[0*stride + 2] * pf[2*stride + 0];
423  pfres2[1*stride + 2] = pf[0*stride + 2] * pf[1*stride + 0] - pf[0*stride + 0] * pf[1*stride + 2];
424  pfres2[2*stride + 0] = pf[1*stride + 0] * pf[2*stride + 1] - pf[1*stride + 1] * pf[2*stride + 0];
425  pfres2[2*stride + 1] = pf[0*stride + 1] * pf[2*stride + 0] - pf[0*stride + 0] * pf[2*stride + 1];
426  pfres2[2*stride + 2] = pf[0*stride + 0] * pf[1*stride + 1] - pf[0*stride + 1] * pf[1*stride + 0];
427 
428  T fdet = pf[0*stride + 2] * pfres2[2*stride + 0] + pf[1*stride + 2] * pfres2[2*stride + 1] +
429  pf[2*stride + 2] * pfres2[2*stride + 2];
430 
431  if( pfdet != NULL )
432  pfdet[0] = fdet;
433 
434  if( fabs(fdet) < 1e-12 ) {
435  return NULL;
436  }
437 
438  fdet = 1 / fdet;
439  //if( pfdet != NULL ) *pfdet = fdet;
440 
441  if( pfres != pf ) {
442  pfres[0*stride+0] *= fdet; pfres[0*stride+1] *= fdet; pfres[0*stride+2] *= fdet;
443  pfres[1*stride+0] *= fdet; pfres[1*stride+1] *= fdet; pfres[1*stride+2] *= fdet;
444  pfres[2*stride+0] *= fdet; pfres[2*stride+1] *= fdet; pfres[2*stride+2] *= fdet;
445  return pfres;
446  }
447 
448  pfres[0*stride+0] = pfres2[0*stride+0] * fdet;
449  pfres[0*stride+1] = pfres2[0*stride+1] * fdet;
450  pfres[0*stride+2] = pfres2[0*stride+2] * fdet;
451  pfres[1*stride+0] = pfres2[1*stride+0] * fdet;
452  pfres[1*stride+1] = pfres2[1*stride+1] * fdet;
453  pfres[1*stride+2] = pfres2[1*stride+2] * fdet;
454  pfres[2*stride+0] = pfres2[2*stride+0] * fdet;
455  pfres[2*stride+1] = pfres2[2*stride+1] * fdet;
456  pfres[2*stride+2] = pfres2[2*stride+2] * fdet;
457  return pfres;
458 }
459 
461 template <typename T>
462 inline T* _inv4(const T* pf, T* pfres)
463 {
464  T* pfres2;
465  if( pfres == pf ) pfres2 = (T*)alloca(16 * sizeof(T));
466  else pfres2 = pfres;
467 
468  // inverse = C^t / det(pf) where C is the matrix of coefficients
469 
470  // calc C^t
471 
472  // determinants of all possibel 2x2 submatrices formed by last two rows
473  T fd0, fd1, fd2;
474  T f1, f2, f3;
475  fd0 = pf[2*4 + 0] * pf[3*4 + 1] - pf[2*4 + 1] * pf[3*4 + 0];
476  fd1 = pf[2*4 + 1] * pf[3*4 + 2] - pf[2*4 + 2] * pf[3*4 + 1];
477  fd2 = pf[2*4 + 2] * pf[3*4 + 3] - pf[2*4 + 3] * pf[3*4 + 2];
478 
479  f1 = pf[2*4 + 1] * pf[3*4 + 3] - pf[2*4 + 3] * pf[3*4 + 1];
480  f2 = pf[2*4 + 0] * pf[3*4 + 3] - pf[2*4 + 3] * pf[3*4 + 0];
481  f3 = pf[2*4 + 0] * pf[3*4 + 2] - pf[2*4 + 2] * pf[3*4 + 0];
482 
483  pfres2[0*4 + 0] = pf[1*4 + 1] * fd2 - pf[1*4 + 2] * f1 + pf[1*4 + 3] * fd1;
484  pfres2[0*4 + 1] = -(pf[0*4 + 1] * fd2 - pf[0*4 + 2] * f1 + pf[0*4 + 3] * fd1);
485 
486  pfres2[1*4 + 0] = -(pf[1*4 + 0] * fd2 - pf[1*4 + 2] * f2 + pf[1*4 + 3] * f3);
487  pfres2[1*4 + 1] = pf[0*4 + 0] * fd2 - pf[0*4 + 2] * f2 + pf[0*4 + 3] * f3;
488 
489  pfres2[2*4 + 0] = pf[1*4 + 0] * f1 - pf[1*4 + 1] * f2 + pf[1*4 + 3] * fd0;
490  pfres2[2*4 + 1] = -(pf[0*4 + 0] * f1 - pf[0*4 + 1] * f2 + pf[0*4 + 3] * fd0);
491 
492  pfres2[3*4 + 0] = -(pf[1*4 + 0] * fd1 - pf[1*4 + 1] * f3 + pf[1*4 + 2] * fd0);
493  pfres2[3*4 + 1] = pf[0*4 + 0] * fd1 - pf[0*4 + 1] * f3 + pf[0*4 + 2] * fd0;
494 
495  // determinants of first 2 rows of 4x4 matrix
496  fd0 = pf[0*4 + 0] * pf[1*4 + 1] - pf[0*4 + 1] * pf[1*4 + 0];
497  fd1 = pf[0*4 + 1] * pf[1*4 + 2] - pf[0*4 + 2] * pf[1*4 + 1];
498  fd2 = pf[0*4 + 2] * pf[1*4 + 3] - pf[0*4 + 3] * pf[1*4 + 2];
499 
500  f1 = pf[0*4 + 1] * pf[1*4 + 3] - pf[0*4 + 3] * pf[1*4 + 1];
501  f2 = pf[0*4 + 0] * pf[1*4 + 3] - pf[0*4 + 3] * pf[1*4 + 0];
502  f3 = pf[0*4 + 0] * pf[1*4 + 2] - pf[0*4 + 2] * pf[1*4 + 0];
503 
504  pfres2[0*4 + 2] = pf[3*4 + 1] * fd2 - pf[3*4 + 2] * f1 + pf[3*4 + 3] * fd1;
505  pfres2[0*4 + 3] = -(pf[2*4 + 1] * fd2 - pf[2*4 + 2] * f1 + pf[2*4 + 3] * fd1);
506 
507  pfres2[1*4 + 2] = -(pf[3*4 + 0] * fd2 - pf[3*4 + 2] * f2 + pf[3*4 + 3] * f3);
508  pfres2[1*4 + 3] = pf[2*4 + 0] * fd2 - pf[2*4 + 2] * f2 + pf[2*4 + 3] * f3;
509 
510  pfres2[2*4 + 2] = pf[3*4 + 0] * f1 - pf[3*4 + 1] * f2 + pf[3*4 + 3] * fd0;
511  pfres2[2*4 + 3] = -(pf[2*4 + 0] * f1 - pf[2*4 + 1] * f2 + pf[2*4 + 3] * fd0);
512 
513  pfres2[3*4 + 2] = -(pf[3*4 + 0] * fd1 - pf[3*4 + 1] * f3 + pf[3*4 + 2] * fd0);
514  pfres2[3*4 + 3] = pf[2*4 + 0] * fd1 - pf[2*4 + 1] * f3 + pf[2*4 + 2] * fd0;
515 
516  T fdet = pf[0*4 + 3] * pfres2[3*4 + 0] + pf[1*4 + 3] * pfres2[3*4 + 1] +
517  pf[2*4 + 3] * pfres2[3*4 + 2] + pf[3*4 + 3] * pfres2[3*4 + 3];
518 
519  if( fabs(fdet) < 1e-9) return NULL;
520 
521  fdet = 1 / fdet;
522  //if( pfdet != NULL ) *pfdet = fdet;
523 
524  if( pfres2 == pfres ) {
525  mult(pfres, fdet, 16);
526  return pfres;
527  }
528 
529  int i = 0;
530  while(i < 16) {
531  pfres[i] = pfres2[i] * fdet;
532  ++i;
533  }
534 
535  return pfres;
536 }
537 
539 template <typename T>
540 inline T* _transpose3(const T* pf, T* pfres)
541 {
542  MATH_ASSERT( pf != NULL && pfres != NULL );
543 
544  if( pf == pfres ) {
545  std::swap(pfres[1], pfres[3]);
546  std::swap(pfres[2], pfres[6]);
547  std::swap(pfres[5], pfres[7]);
548  return pfres;
549  }
550 
551  pfres[0] = pf[0]; pfres[1] = pf[3]; pfres[2] = pf[6];
552  pfres[3] = pf[1]; pfres[4] = pf[4]; pfres[5] = pf[7];
553  pfres[6] = pf[2]; pfres[7] = pf[5]; pfres[8] = pf[8];
554 
555  return pfres;
556 }
557 
559 template <typename T>
560 inline T* _transpose4(const T* pf, T* pfres)
561 {
562  MATH_ASSERT( pf != NULL && pfres != NULL );
563 
564  if( pf == pfres ) {
565  std::swap(pfres[1], pfres[4]);
566  std::swap(pfres[2], pfres[8]);
567  std::swap(pfres[3], pfres[12]);
568  std::swap(pfres[6], pfres[9]);
569  std::swap(pfres[7], pfres[13]);
570  std::swap(pfres[11], pfres[15]);
571  return pfres;
572  }
573 
574  pfres[0] = pf[0]; pfres[1] = pf[4]; pfres[2] = pf[8]; pfres[3] = pf[12];
575  pfres[4] = pf[1]; pfres[5] = pf[5]; pfres[6] = pf[9]; pfres[7] = pf[13];
576  pfres[8] = pf[2]; pfres[9] = pf[6]; pfres[10] = pf[10]; pfres[11] = pf[14];
577  pfres[12] = pf[3]; pfres[13] = pf[7]; pfres[14] = pf[11]; pfres[15] = pf[15];
578  return pfres;
579 }
580 
581 template <typename T>
582 inline T _dot2(const T* pf1, const T* pf2)
583 {
584  MATH_ASSERT( pf1 != NULL && pf2 != NULL );
585  return pf1[0]*pf2[0] + pf1[1]*pf2[1];
586 }
587 
588 template <typename T>
589 inline T _dot3(const T* pf1, const T* pf2)
590 {
591  MATH_ASSERT( pf1 != NULL && pf2 != NULL );
592  return pf1[0]*pf2[0] + pf1[1]*pf2[1] + pf1[2]*pf2[2];
593 }
594 
595 template <typename T>
596 inline T _dot4(const T* pf1, const T* pf2)
597 {
598  MATH_ASSERT( pf1 != NULL && pf2 != NULL );
599  return pf1[0]*pf2[0] + pf1[1]*pf2[1] + pf1[2]*pf2[2] + pf1[3] * pf2[3];
600 }
601 
602 template <typename T>
603 inline T _lengthsqr2(const T* pf)
604 {
605  MATH_ASSERT( pf != NULL );
606  return pf[0] * pf[0] + pf[1] * pf[1];
607 }
608 
609 template <typename T>
610 inline T _lengthsqr3(const T* pf)
611 {
612  MATH_ASSERT( pf != NULL );
613  return pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2];
614 }
615 
616 template <typename T>
617 inline T _lengthsqr4(const T* pf)
618 {
619  MATH_ASSERT( pf != NULL );
620  return pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2] + pf[3] * pf[3];
621 }
622 
623 template <typename T>
624 inline T* _normalize2(T* pfout, const T* pf)
625 {
626  MATH_ASSERT(pf != NULL);
627 
628  T f = pf[0]*pf[0] + pf[1]*pf[1];
629  f = 1 / sqrt(f);
630  pfout[0] = pf[0] * f;
631  pfout[1] = pf[1] * f;
632 
633  return pfout;
634 }
635 
636 template <typename T>
637 inline T* _normalize3(T* pfout, const T* pf)
638 {
639  MATH_ASSERT(pf != NULL);
640 
641  T f = pf[0]*pf[0] + pf[1]*pf[1] + pf[2]*pf[2];
642 
643  f = 1 / sqrt(f);
644  pfout[0] = pf[0] * f;
645  pfout[1] = pf[1] * f;
646  pfout[2] = pf[2] * f;
647 
648  return pfout;
649 }
650 
651 template <typename T>
652 inline T* _normalize4(T* pfout, const T* pf)
653 {
654  MATH_ASSERT(pf != NULL);
655 
656  T f = pf[0]*pf[0] + pf[1]*pf[1] + pf[2]*pf[2] + pf[3]*pf[3];
657 
658  f = 1 / sqrt(f);
659  pfout[0] = pf[0] * f;
660  pfout[1] = pf[1] * f;
661  pfout[2] = pf[2] * f;
662  pfout[3] = pf[3] * f;
663 
664  return pfout;
665 }
666 
667 template <typename T>
668 inline T* _cross3(T* pfout, const T* pf1, const T* pf2)
669 {
670  MATH_ASSERT( pfout != NULL && pf1 != NULL && pf2 != NULL );
671 
672  T temp[3];
673  temp[0] = pf1[1] * pf2[2] - pf1[2] * pf2[1];
674  temp[1] = pf1[2] * pf2[0] - pf1[0] * pf2[2];
675  temp[2] = pf1[0] * pf2[1] - pf1[1] * pf2[0];
676 
677  pfout[0] = temp[0]; pfout[1] = temp[1]; pfout[2] = temp[2];
678  return pfout;
679 }
680 
681 template <typename T>
682 inline T* _transnorm3(T* pfout, const T* pfmat, const T* pf)
683 {
684  MATH_ASSERT( pfout != NULL && pf != NULL && pfmat != NULL );
685 
686  T dummy[3];
687  T* pfreal = (pfout == pf) ? dummy : pfout;
688 
689  pfreal[0] = pf[0] * pfmat[0] + pf[1] * pfmat[1] + pf[2] * pfmat[2];
690  pfreal[1] = pf[0] * pfmat[3] + pf[1] * pfmat[4] + pf[2] * pfmat[5];
691  pfreal[2] = pf[0] * pfmat[6] + pf[1] * pfmat[7] + pf[2] * pfmat[8];
692 
693  if( pfout ==pf ) {
694  pfout[0] = pfreal[0];
695  pfout[1] = pfreal[1];
696  pfout[2] = pfreal[2];
697  }
698 
699  return pfout;
700 }
701 
702 inline float* mult4(float* pfres, const float* pf1, const float* pf2) {
703  return _mult4<float>(pfres, pf1, pf2);
704 }
705 // pf1^T * pf2
706 inline float* multtrans3(float* pfres, const float* pf1, const float* pf2) {
707  return _multtrans3<float>(pfres, pf1, pf2);
708 }
709 inline float* multtrans4(float* pfres, const float* pf1, const float* pf2) {
710  return _multtrans4<float>(pfres, pf1, pf2);
711 }
712 inline float* transnorm3(float* pfout, const float* pfmat, const float* pf) {
713  return _transnorm3<float>(pfout, pfmat, pf);
714 }
715 
716 inline float* transpose3(const float* pf, float* pfres) {
717  return _transpose3<float>(pf, pfres);
718 }
719 inline float* transpose4(const float* pf, float* pfres) {
720  return _transpose4<float>(pf, pfres);
721 }
722 
723 inline float dot2(const float* pf1, const float* pf2) {
724  return _dot2<float>(pf1, pf2);
725 }
726 inline float dot3(const float* pf1, const float* pf2) {
727  return _dot3<float>(pf1, pf2);
728 }
729 inline float dot4(const float* pf1, const float* pf2) {
730  return _dot4<float>(pf1, pf2);
731 }
732 
733 inline float lengthsqr2(const float* pf) {
734  return _lengthsqr2<float>(pf);
735 }
736 inline float lengthsqr3(const float* pf) {
737  return _lengthsqr3<float>(pf);
738 }
739 inline float lengthsqr4(const float* pf) {
740  return _lengthsqr4<float>(pf);
741 }
742 
743 inline float* normalize2(float* pfout, const float* pf) {
744  return _normalize2<float>(pfout, pf);
745 }
746 inline float* normalize3(float* pfout, const float* pf) {
747  return _normalize3<float>(pfout, pf);
748 }
749 inline float* normalize4(float* pfout, const float* pf) {
750  return _normalize4<float>(pfout, pf);
751 }
752 
753 inline float* cross3(float* pfout, const float* pf1, const float* pf2) {
754  return _cross3<float>(pfout, pf1, pf2);
755 }
756 
757 // multiplies 3x3 matrices
758 inline float* mult3_s4(float* pfres, const float* pf1, const float* pf2) {
759  return _mult3_s4<float>(pfres, pf1, pf2);
760 }
761 inline float* mult3_s3(float* pfres, const float* pf1, const float* pf2) {
762  return _mult3_s3<float>(pfres, pf1, pf2);
763 }
764 
765 inline float* inv3(const float* pf, float* pfres, float* pfdet, int stride) {
766  return _inv3<float>(pf, pfres, pfdet, stride);
767 }
768 inline float* inv4(const float* pf, float* pfres) {
769  return _inv4<float>(pf, pfres);
770 }
771 
772 
773 inline double* mult4(double* pfres, const double* pf1, const double* pf2) {
774  return _mult4<double>(pfres, pf1, pf2);
775 }
776 // pf1^T * pf2
777 inline double* multtrans3(double* pfres, const double* pf1, const double* pf2) {
778  return _multtrans3<double>(pfres, pf1, pf2);
779 }
780 inline double* multtrans4(double* pfres, const double* pf1, const double* pf2) {
781  return _multtrans4<double>(pfres, pf1, pf2);
782 }
783 inline double* transnorm3(double* pfout, const double* pfmat, const double* pf) {
784  return _transnorm3<double>(pfout, pfmat, pf);
785 }
786 
787 inline double* transpose3(const double* pf, double* pfres) {
788  return _transpose3<double>(pf, pfres);
789 }
790 inline double* transpose4(const double* pf, double* pfres) {
791  return _transpose4<double>(pf, pfres);
792 }
793 
794 inline double dot2(const double* pf1, const double* pf2) {
795  return _dot2<double>(pf1, pf2);
796 }
797 inline double dot3(const double* pf1, const double* pf2) {
798  return _dot3<double>(pf1, pf2);
799 }
800 inline double dot4(const double* pf1, const double* pf2) {
801  return _dot4<double>(pf1, pf2);
802 }
803 
804 inline double lengthsqr2(const double* pf) {
805  return _lengthsqr2<double>(pf);
806 }
807 inline double lengthsqr3(const double* pf) {
808  return _lengthsqr3<double>(pf);
809 }
810 inline double lengthsqr4(const double* pf) {
811  return _lengthsqr4<double>(pf);
812 }
813 
814 inline double* normalize2(double* pfout, const double* pf) {
815  return _normalize2<double>(pfout, pf);
816 }
817 inline double* normalize3(double* pfout, const double* pf) {
818  return _normalize3<double>(pfout, pf);
819 }
820 inline double* normalize4(double* pfout, const double* pf) {
821  return _normalize4<double>(pfout, pf);
822 }
823 
824 inline double* cross3(double* pfout, const double* pf1, const double* pf2) {
825  return _cross3<double>(pfout, pf1, pf2);
826 }
827 
828 // multiplies 3x3 matrices
829 inline double* mult3_s4(double* pfres, const double* pf1, const double* pf2) {
830  return _mult3_s4<double>(pfres, pf1, pf2);
831 }
832 inline double* mult3_s3(double* pfres, const double* pf1, const double* pf2) {
833  return _mult3_s3<double>(pfres, pf1, pf2);
834 }
835 
836 inline double* inv3(const double* pf, double* pfres, double* pfdet, int stride) {
837  return _inv3<double>(pf, pfres, pfdet, stride);
838 }
839 inline double* inv4(const double* pf, double* pfres) {
840  return _inv4<double>(pf, pfres);
841 }
842 
843 template <typename T, typename R, typename S>
844 inline S* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
845 {
846  MATH_ASSERT( pf1 != NULL && pf2 != NULL && pfres != NULL);
847  int j, k;
848 
849  if( !badd ) memset(pfres, 0, sizeof(S) * r1 * c2);
850 
851  while(r1 > 0) {
852  --r1;
853 
854  j = 0;
855  while(j < c2) {
856  k = 0;
857  while(k < c1) {
858  pfres[j] += (S)(pf1[k] * pf2[k*c2 + j]);
859  ++k;
860  }
861  ++j;
862  }
863 
864  pf1 += c1;
865  pfres += c2;
866  }
867 
868  return pfres;
869 }
870 
871 template <typename T, typename R, typename S>
872 inline S* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
873 {
874  MATH_ASSERT( pf1 != NULL && pf2 != NULL && pfres != NULL);
875  int i, j, k;
876 
877  if( !badd ) memset(pfres, 0, sizeof(S) * c1 * c2);
878 
879  i = 0;
880  while(i < c1) {
881 
882  j = 0;
883  while(j < c2) {
884 
885  k = 0;
886  while(k < r1) {
887  pfres[j] += (S)(pf1[k*c1] * pf2[k*c2 + j]);
888  ++k;
889  }
890  ++j;
891  }
892 
893  pfres += c2;
894  ++pf1;
895 
896  ++i;
897  }
898 
899  return pfres;
900 }
901 
902 template <typename T, typename R, typename S>
903 inline S* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool badd)
904 {
905  MATH_ASSERT( pf1 != NULL && pf2 != NULL && pfres != NULL);
906  int j, k;
907 
908  if( !badd ) memset(pfres, 0, sizeof(S) * r1 * r2);
909 
910  while(r1 > 0) {
911  --r1;
912 
913  j = 0;
914  while(j < r2) {
915  k = 0;
916  while(k < c1) {
917  pfres[j] += (S)(pf1[k] * pf2[j*c1 + k]);
918  ++k;
919  }
920  ++j;
921  }
922 
923  pf1 += c1;
924  pfres += r2;
925  }
926 
927  return pfres;
928 }
929 
930 template <typename T> inline T* multto1(T* pf1, T* pf2, int r, int c, T* pftemp)
931 {
932  MATH_ASSERT( pf1 != NULL && pf2 != NULL );
933 
934  int j, k;
935  bool bdel = false;
936 
937  if( pftemp == NULL ) {
938  pftemp = new T[c];
939  bdel = true;
940  }
941 
942  while(r > 0) {
943  --r;
944 
945  j = 0;
946  while(j < c) {
947 
948  pftemp[j] = 0.0;
949 
950  k = 0;
951  while(k < c) {
952  pftemp[j] += pf1[k] * pf2[k*c + j];
953  ++k;
954  }
955  ++j;
956  }
957 
958  memcpy(pf1, pftemp, c * sizeof(T));
959  pf1 += c;
960  }
961 
962  if( bdel ) delete[] pftemp;
963 
964  return pf1;
965 }
966 
967 template <typename T, typename S> inline T* multto2(T* pf1, S* pf2, int r2, int c2, S* pftemp)
968 {
969  MATH_ASSERT( pf1 != NULL && pf2 != NULL );
970 
971  int i, j, k;
972  bool bdel = false;
973 
974  if( pftemp == NULL ) {
975  pftemp = new S[r2];
976  bdel = true;
977  }
978 
979  // do columns first
980  j = 0;
981  while(j < c2) {
982  i = 0;
983  while(i < r2) {
984 
985  pftemp[i] = 0.0;
986 
987  k = 0;
988  while(k < r2) {
989  pftemp[i] += pf1[i*r2 + k] * pf2[k*c2 + j];
990  ++k;
991  }
992  ++i;
993  }
994 
995  i = 0;
996  while(i < r2) {
997  *(pf2+i*c2+j) = pftemp[i];
998  ++i;
999  }
1000 
1001  ++j;
1002  }
1003 
1004  if( bdel ) delete[] pftemp;
1005 
1006  return pf1;
1007 }
1008 
1009 template <typename T> inline void add(T* pf1, T* pf2, int r)
1010 {
1011  MATH_ASSERT( pf1 != NULL && pf2 != NULL);
1012 
1013  while(r > 0) {
1014  --r;
1015  pf1[r] += pf2[r];
1016  }
1017 }
1018 
1019 template <typename T> inline void sub(T* pf1, T* pf2, int r)
1020 {
1021  MATH_ASSERT( pf1 != NULL && pf2 != NULL);
1022 
1023  while(r > 0) {
1024  --r;
1025  pf1[r] -= pf2[r];
1026  }
1027 }
1028 
1029 template <typename T> inline T normsqr(const T* pf1, int r)
1030 {
1031  MATH_ASSERT( pf1 != NULL );
1032 
1033  T d = 0.0;
1034  while(r > 0) {
1035  --r;
1036  d += pf1[r] * pf1[r];
1037  }
1038 
1039  return d;
1040 }
1041 
1042 template <typename T> inline T lengthsqr(const T* pf1, const T* pf2, int length)
1043 {
1044  T d = 0;
1045  while(length > 0) {
1046  --length;
1047  T t = pf1[length] - pf2[length];
1048  d += t * t;
1049  }
1050 
1051  return d;
1052 }
1053 
1054 template <typename T> inline T dot(T* pf1, T* pf2, int length)
1055 {
1056  T d = 0;
1057  while(length > 0) {
1058  --length;
1059  d += pf1[length] * pf2[length];
1060  }
1061 
1062  return d;
1063 }
1064 
1065 template <typename T> inline T sum(T* pf, int length)
1066 {
1067  T d = 0;
1068  while(length > 0) {
1069  --length;
1070  d += pf[length];
1071  }
1072 
1073  return d;
1074 }
1075 
1076 template <typename T> inline bool inv2(T* pf, T* pfres)
1077 {
1078  T fdet = pf[0] * pf[3] - pf[1] * pf[2];
1079 
1080  if( fabs(fdet) < 1e-16 ) return false;
1081 
1082  fdet = 1 / fdet;
1083  //if( pfdet != NULL ) *pfdet = fdet;
1084 
1085  if( pfres != pf ) {
1086  pfres[0] = fdet * pf[3]; pfres[1] = -fdet * pf[1];
1087  pfres[2] = -fdet * pf[2]; pfres[3] = fdet * pf[0];
1088  return true;
1089  }
1090 
1091  T ftemp = pf[0];
1092  pfres[0] = pf[3] * fdet;
1093  pfres[1] *= -fdet;
1094  pfres[2] *= -fdet;
1095  pfres[3] = ftemp * pf[0];
1096 
1097  return true;
1098 }
1099 
1100 template <typename T, typename S>
1101 void Tridiagonal3 (S* mat, T* diag, T* subd)
1102 {
1103  T a, b, c, d, e, f, ell, q;
1104 
1105  a = mat[0*3+0];
1106  b = mat[0*3+1];
1107  c = mat[0*3+2];
1108  d = mat[1*3+1];
1109  e = mat[1*3+2];
1110  f = mat[2*3+2];
1111 
1112  subd[2] = 0.0;
1113  diag[0] = a;
1114  if ( fabs(c) >= g_fEpsilon ) {
1115  ell = (T)sqrt(b*b+c*c);
1116  b /= ell;
1117  c /= ell;
1118  q = 2*b*e+c*(f-d);
1119  diag[1] = d+c*q;
1120  diag[2] = f-c*q;
1121  subd[0] = ell;
1122  subd[1] = e-b*q;
1123  mat[0*3+0] = (S)1; mat[0*3+1] = (S)0; mat[0*3+2] = (T)0;
1124  mat[1*3+0] = (S)0; mat[1*3+1] = b; mat[1*3+2] = c;
1125  mat[2*3+0] = (S)0; mat[2*3+1] = c; mat[2*3+2] = -b;
1126  }
1127  else {
1128  diag[1] = d;
1129  diag[2] = f;
1130  subd[0] = b;
1131  subd[1] = e;
1132  mat[0*3+0] = (S)1; mat[0*3+1] = (S)0; mat[0*3+2] = (S)0;
1133  mat[1*3+0] = (S)0; mat[1*3+1] = (S)1; mat[1*3+2] = (S)0;
1134  mat[2*3+0] = (S)0; mat[2*3+1] = (S)0; mat[2*3+2] = (S)1;
1135  }
1136 }
1137 
1138 template <typename T>
1139 inline void svd3(const T* A, T* U, T* D, T* V)
1140 {
1141  T VVt[9];
1142  T eigenvalues[3];
1143  multtrans3(VVt, A, A);
1144  // get eigen values of V: VVt V = V D^2
1145  T afSubDiag[3];
1146  std::copy(&VVt,&VVt[9],&V[0]);
1147  Tridiagonal3(V,eigenvalues,afSubDiag);
1148  QLAlgorithm3(V,eigenvalues,afSubDiag);
1149 
1150  //float fDet = V[0*3+0] * (V[1*3+1] * V[2*3+2] - V[1*3+2] * V[2*3+1]) +
1151  // V[0*3+1] * (V[1*3+2] * V[2*3+0] - V[1*3+0] * V[2*3+2]) +
1152  // V[0*3+2] * (V[1*3+0] * V[2*3+1] - V[1*3+1] * V[2*3+0]);
1153  //
1154  // if ( fDet < 0.0f ) {
1155  // V[0*3+2] = - V[0*3+2];
1156  // V[1*3+2] = - V[1*3+2];
1157  // V[2*3+2] = - V[2*3+2];
1158  // }
1159 
1160  mult3_s3(U, A, V); // U = A V = U D
1161  for(int i = 0; i < 3; ++i) {
1162  D[i] = sqrt(eigenvalues[i]);
1163  T f = 1/D[i];
1164  U[i] *= f;
1165  U[i+3] *= f;
1166  U[i+6] *= f;
1167  }
1168  int maxval = 0;
1169  if( D[1] > D[maxval] ) {
1170  maxval = 1;
1171  }
1172  if( D[2] > D[maxval] ) {
1173  maxval = 2;
1174  }
1175  if( maxval > 0 ) {
1176  // flip columns
1177  swap(U[0], U[maxval]);
1178  swap(U[3], U[3+maxval]);
1179  swap(U[6], U[6+maxval]);
1180  swap(V[0], V[maxval]);
1181  swap(V[3], V[3+maxval]);
1182  swap(V[6], V[6+maxval]);
1183  swap(D[0], D[maxval]);
1184  }
1185  if( D[1] < D[2] ) {
1186  swap(U[1], U[2]);
1187  swap(U[4], U[5]);
1188  swap(U[7], U[8]);
1189  swap(V[1], V[2]);
1190  swap(V[4], V[5]);
1191  swap(V[7], V[8]);
1192  swap(D[1], D[2]);
1193  }
1194 }
1195 
1196 template <typename T>
1197 int Min(T* pts, int stride, int numPts)
1198 {
1199  MATH_ASSERT( pts != NULL && numPts > 0 && stride > 0 );
1200 
1201  int best = pts[0]; pts += stride;
1202  for(int i = 1; i < numPts; ++i, pts += stride) {
1203  if( best > pts[0] )
1204  best = pts[0];
1205  }
1206 
1207  return best;
1208 }
1209 
1210 template <typename T>
1211 int Max(T* pts, int stride, int numPts)
1212 {
1213  MATH_ASSERT( pts != NULL && numPts > 0 && stride > 0 );
1214 
1215  int best = pts[0]; pts += stride;
1216  for(int i = 1; i < numPts; ++i, pts += stride) {
1217  if( best < pts[0] )
1218  best = pts[0];
1219  }
1220 
1221  return best;
1222 }
1223 
1224 } // end namespace mathextra
1225 } // end namespace OpenRAVE
1226 
1227 #endif