amino
Lightweight Robot Utility Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
gcc.h
1 /* -*- mode: C; c-basic-offset: 4 -*- */
2 /* ex: set shiftwidth=4 tabstop=4 expandtab: */
3 /*
4  * Copyright (c) 2013, Georgia Tech Research Corporation
5  * All rights reserved.
6  *
7  * Author(s): Neil T. Dantam <ntd@gatech.edu>
8  * Georgia Tech Humanoid Robotics Lab
9  * Under Direction of Prof. Mike Stilman <mstilman@cc.gatech.edu>
10  *
11  *
12  * This file is provided under the following "BSD-style" License:
13  *
14  *
15  * Redistribution and use in source and binary forms, with or
16  * without modification, are permitted provided that the following
17  * conditions are met:
18  *
19  * * Redistributions of source code must retain the above copyright
20  * notice, this list of conditions and the following disclaimer.
21  *
22  * * Redistributions in binary form must reproduce the above
23  * copyright notice, this list of conditions and the following
24  * disclaimer in the documentation and/or other materials provided
25  * with the distribution.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
28  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
29  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
30  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
31  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
35  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
36  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
38  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39  * POSSIBILITY OF SUCH DAMAGE.
40  *
41  */
42 
43 #ifndef AA_AMINO_ARCH_GCC_H
44 #define AA_AMINO_ARCH_GCC_H
45 
46 typedef double aa_vec_2d __attribute__ ((vector_size (16), aligned(16)));
47 typedef double aa_vec_4d __attribute__ ((vector_size (32), aligned(32)));
48 typedef int64_t aa_vec_4d_size __attribute__ ((vector_size (32)));
49 typedef int64_t aa_vec_2d_size __attribute__ ((vector_size (16)));
50 
51 
52 
53 static inline aa_vec_4d
54 aa_vec_4d_shuffle( aa_vec_4d a,
55  int64_t i0, int64_t i1, int64_t i2, int64_t i3 ) {
56  aa_vec_4d_size m = {i0,i1,i2,i3};
57  return __builtin_shuffle(a,m);
58 }
59 
60 static inline aa_vec_2d
61 aa_vec_2d_shuffle( aa_vec_2d a,
62  int64_t i0, int64_t i1 ) {
63  aa_vec_2d_size m = {i0,i1};
64  return __builtin_shuffle(a,m);
65 }
66 
67 
68 static inline aa_vec_2d
69 aa_vec_2d_swap( aa_vec_2d a ) {
70  return aa_vec_2d_shuffle(a, 1, 0 );
71 }
72 
73 static inline aa_vec_4d
74 aa_vec_4d_shuffle2( aa_vec_4d a, aa_vec_4d b,
75  int64_t i0, int64_t i1, int64_t i2, int64_t i3 ) {
76  aa_vec_4d_size m = {i0,i1,i2,i3};
77  return __builtin_shuffle(a,b,m);
78 }
79 
80 
82 static inline aa_vec_4d
83 aa_vec_4d_ld( const double src[4] ) {
84  return *(aa_vec_4d*)src;
85 }
86 
88 static inline void
89 aa_vec_4d_st( double dst[4], const aa_vec_4d src ) {
90  *(aa_vec_4d*)dst = src;
91 }
92 
93 
95 static inline aa_vec_2d
96 aa_vec_2d_ld( const double src[2] ) {
97  return *(aa_vec_2d*)src;
98 }
99 
101 static inline void
102 aa_vec_2d_st( double dst[2], const aa_vec_2d src ) {
103  *(aa_vec_2d*)dst = src;
104 }
105 
106 #ifdef __AVX__
107 
108 #include "amino/arch/avx.h"
109 
110 #else // generic load/store
111 
112 
114 static inline aa_vec_4d
115 aa_vec_3d_ld( const double src[3] ) {
116  aa_vec_4d dst = {src[0], src[1], src[2] };
117  return dst;
118 }
119 
120 
121 #endif
122 
124 static inline void
125 aa_vec_3d_st( double dst[3], const aa_vec_4d src ) {
126  dst[0] = src[0];
127  dst[1] = src[1];
128  dst[2] = src[2];
129 }
130 
131 
133 double
134 aa_vec_4d_dot( const aa_vec_4d a, const aa_vec_4d b ) {
135  aa_vec_4d sq = a*b;
136  aa_vec_2d y = {sq[2], sq[3]};
137  aa_vec_2d x = {sq[0], sq[1]};
138  aa_vec_2d z = x+y;
139  return z[0] + z[1];
140 }
141 
143 static inline aa_vec_4d
144 aa_vec_cross( const aa_vec_4d a, const aa_vec_4d b ) {
145  aa_vec_4d tmp = ( a * aa_vec_4d_shuffle(b, 1,2,0,3) -
146  aa_vec_4d_shuffle(a, 1,2,0,3) * b );
147 
148  return aa_vec_4d_shuffle(tmp, 1,2,0,3);
149 }
150 
151 void aa_vecm_cross( const double a[AA_RESTRICT 3], const double b[AA_RESTRICT 3],
152  double c[AA_RESTRICT 3] );
153 
154 /*---- QUATERNIONS ----*/
155 
157 static inline aa_vec_4d
158 aa_vec_qconj( const aa_vec_4d q ) {
159  aa_vec_4d c = -q;
160  c[3] *= -1;
161  return c;
162 }
163 
164 
166 static inline aa_vec_4d
167 aa_vec_qmul( const aa_vec_4d a, const aa_vec_4d b ) {
168  aa_vec_4d vc;
169  vc = ( aa_vec_4d_shuffle( a, 0,2,3,1 ) * aa_vec_4d_shuffle( b, 3,0,2,1) +
170  aa_vec_4d_shuffle( a, 1,3,2,0 ) * aa_vec_4d_shuffle( b, 2,1,3,0) +
171  aa_vec_4d_shuffle( a, 3,1,0,2 ) * aa_vec_4d_shuffle( b, 0,3,1,2) -
172  aa_vec_4d_shuffle( a, 2,0,1,3 ) * aa_vec_4d_shuffle( b, 1,2,0,3) );
173 
174  vc[3] = -vc[3];
175 
176  return vc;
177 }
178 
179 
180 
181 #define AA_VEC_QMUL_2DB( ax, ay, az, aw, bxy, bzw, rxy, rzw ) { \
182  aa_vec_2d aa_vec_tmp; \
183  aa_vec_tmp = ax*bzw - az*bxy; \
184  aa_vec_tmp[0] = -aa_vec_tmp[0]; \
185  rx_xy = ay*bzw + aw*bxy + aa_vec_2d_swap(aa_vec_tmp); \
186  aa_vec_tmp = ax*bxy + az*bzw; \
187  aa_vec_tmp[0] = -aa_vec_tmp[0]; \
188  rx_wz = aw*bzw - ay*bxy + aa_vec_2d_swap(aa_vec_tmp); \
189  }
190 
191 static inline aa_vec_4d
192 aa_vec_vqmul( const aa_vec_4d v, const aa_vec_4d q ) {
193  aa_vec_4d t = aa_vec_4d_shuffle(v, 2,0,1,1);
194  t[3] = -v[2];
195 
196  aa_vec_4d y;
197  y = aa_vec_4d_shuffle(v, 1,2,0,0) * aa_vec_4d_shuffle(q, 2,0,1,0);
198  y += aa_vec_4d_shuffle(v, 0,1,2,1) * aa_vec_4d_shuffle(q, 3,3,3,2);
199  y -= t * aa_vec_4d_shuffle(q, 1,2,0,2);
200  y[3] = -y[3];
201  return y;
202 }
203 
205 void aa_vecm_qmul( const double a[AA_RESTRICT 4], const double b[AA_RESTRICT 4],
206  double c[AA_RESTRICT 4] );
207 
208 
210 static inline aa_vec_4d
211 aa_vec_qrot( const aa_vec_4d q, const aa_vec_4d v ) {
212  aa_vec_4d a = aa_vec_cross(q,v) + q[3]*v;
213  aa_vec_4d b = aa_vec_cross(q,a);
214  return 2 * b + v;
215 }
216 
218 void aa_vecm_qrot( const double q[AA_RESTRICT 4], const double v[AA_RESTRICT 3],
219  double p[AA_RESTRICT 3] );
220 
221 /*---- QUATERNION-VECTOR ----*/
222 
224 static inline aa_vec_4d
225 aa_vec_qv_tf( const aa_vec_4d q, const aa_vec_4d v, const aa_vec_4d p )
226 {
227  return aa_vec_qrot(q, p) + v;
228 }
229 
231 #define AA_VEC_QV_MUL( q0, v0, q1, v1, qr, vr ) { \
232  qr = aa_vec_qmul(q0,q1); \
233  vr = aa_vec_qv_tf(q0, v0, v1); \
234  } \
235 
236 /*---- MATRICES ----*/
237 #define AA_VEC_ROTMAT_LD( R0, R1, R2, ptr ) { \
238  R0 = aa_vec_3d_ld(ptr); \
239  R1 = aa_vec_3d_ld(ptr+3); \
240  R2 = aa_vec_3d_ld(ptr+6); \
241  } \
242 
243 
244 static inline void
245 aa_vec_rotmat_st( double T[AA_RESTRICT 12],
246  aa_vec_4d col0, aa_vec_4d col1, aa_vec_4d col2 )
247 {
248  T[0] = col0[0];
249  T[1] = col0[1];
250  T[2] = col0[2];
251  T[3] = col1[0];
252  T[4] = col1[1];
253  T[5] = col1[2];
254  T[6] = col2[0];
255  T[7] = col2[1];
256  T[8] = col2[2];
257 }
258 
260 static inline aa_vec_4d
261 aa_vec_rotmat_tf( const aa_vec_4d R0, const aa_vec_4d R1, const aa_vec_4d R2, const aa_vec_4d p )
262 {
263  return R0*p[0] + R1*p[1] + R2*p[2];
264 }
265 
267 #define AA_VEC_TFMAT_LD( col0, col1, col2, col3, T ) { \
268  col0[0] = T[0]; \
269  col0[1] = T[1]; \
270  col0[2] = T[2]; \
271  col1[0] = T[3]; \
272  col1[1] = T[4]; \
273  col1[2] = T[5]; \
274  col2[0] = T[6]; \
275  col2[1] = T[7]; \
276  col2[2] = T[8]; \
277  col3[0] = T[9]; \
278  col3[1] = T[10]; \
279  col3[2] = T[11]; \
280  }
281 
283 static inline void
284 aa_vec_tfmat_st( double T[AA_RESTRICT 12],
285  aa_vec_4d col0, aa_vec_4d col1, aa_vec_4d col2, aa_vec_4d col3 ) {
286  T[0] = col0[0];
287  T[1] = col0[1];
288  T[2] = col0[2];
289  T[3] = col1[0];
290  T[4] = col1[1];
291  T[5] = col1[2];
292  T[6] = col2[0];
293  T[7] = col2[1];
294  T[8] = col2[2];
295  T[9] = col3[0];
296  T[10] = col3[1];
297  T[11] = col3[2];
298 }
299 
301 static inline aa_vec_4d
302 aa_vec_tfmat_tf( const aa_vec_4d T0, const aa_vec_4d T1, const aa_vec_4d T2, const aa_vec_4d T3,
303  const aa_vec_4d p )
304 {
305  return aa_vec_rotmat_tf(T0, T1, T2, p) + T3;
306 }
307 
309 #define AA_VEC_TFMUL( T0c0, T0c1, T0c2, T0c3, T1, Uc0, Uc1, Uc2, Uc3 ) { \
310  Uc0 = T0c0*T1[0] + T0c1*T1[1] + T0c2*T1[2]; \
311  Uc1 = T0c0*T1[3] + T0c1*T1[4] + T0c2*T1[5]; \
312  Uc2 = T0c0*T1[6] + T0c1*T1[7] + T0c2*T1[8]; \
313  Uc3 = T0c0*T1[9] + T0c1*T1[10] + T0c2*T1[11] + T0c3; \
314  }
315 
317 void aa_vecm_tfmul( const double T0[AA_RESTRICT 12], const double T1[AA_RESTRICT 12],
318  double U[AA_RESTRICT 12] );
319 
320 
321 /*---- DUAL-QUATERNIONS ----*/
322 
324 #define AA_VEC_DUQU_MUL( d0r, d0d, d1r, d1d, d2r, d2d ) { \
325  d2r = aa_vec_qmul( d0r, d1r ); \
326  d2d = aa_vec_qmul( d0r, d1d ) + aa_vec_qmul( d0d, d1r ); \
327  }
328 
330 void aa_vecm_duqu_mul( const double d0[AA_RESTRICT 8], const double d1[AA_RESTRICT 8],
331  double d2[AA_RESTRICT 8] );
332 
334 static inline aa_vec_4d
335 aa_vec_duqu_trans( aa_vec_4d r, aa_vec_4d d ) {
336  return 2 * aa_vec_qmul( d, aa_vec_qconj(r));
337 }
338 
340 static inline aa_vec_4d
341 aa_vec_qv2duqu_dual( aa_vec_4d r, aa_vec_4d d ) {
342  return aa_vec_vqmul( aa_vec_qconj(r), d ) / 2;
343 }
344 
345 #endif //AA_AMINO_ARCH_GCC_H
#define AA_RESTRICT
Defined restrict keyword based on language flavor.
Definition: amino.h:90