1 #include<DD-AVX_internal.hpp>
2 using namespace ddavx_core;
9 std::cerr <<
"error bad vector size" << std::endl;
14 #pragma omp parallel private(regs)
17 get_isie(y.
size(), is, ie);
18 reg alpha_hi = broadcast(alpha.x[0]);
19 reg alpha_lo = broadcast(alpha.x[1]);
20 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
22 reg x_hi = load(x.
hi[i]);
23 reg x_lo = load(x.
lo[i]);
25 reg y_hi = load(y.
hi[i]);
26 reg y_lo = load(y.
lo[i]);
28 Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
34 Fma(y.
hi[i], y.
lo[i], x.
hi[i], x.
lo[i], alpha.x[0], alpha.x[1], y.
hi[i], y.
lo[i]);
40 if(x.size() != y.
size()){
41 std::cerr <<
"error bad vector size" << std::endl;
46 #pragma omp parallel private(regs)
49 get_isie(y.
size(), is, ie);
50 reg alpha_hi = broadcast(alpha.x[0]);
51 reg alpha_lo = broadcast(alpha.x[1]);
52 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
54 reg x_hi = load(x.data()[i]);
55 reg x_lo = regs.zeros;
57 reg y_hi = load(y.
hi[i]);
58 reg y_lo = load(y.
lo[i]);
60 Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
66 Fma(y.
hi[i], y.
lo[i], x.data()[i], 0.0, alpha.x[0], alpha.x[1], y.
hi[i], y.
lo[i]);
72 if(x.
size() != y.size()){
73 std::cerr <<
"error bad vector size" << std::endl;
78 #pragma omp parallel private(regs)
81 get_isie(y.size(), is, ie);
82 reg alpha_hi = broadcast(alpha.x[0]);
83 reg alpha_lo = broadcast(alpha.x[1]);
84 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
86 reg x_hi = load(x.
hi[i]);
87 reg x_lo = load(x.
lo[i]);
89 reg y_hi = load(y.data()[i]);
90 reg y_lo = regs.zeros;
92 Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
94 store(y.data()[i], y_hi);
97 Fma(y.data()[i], x.
hi[i], x.
lo[i], alpha.x[0], alpha.x[1], y.data()[i], 0.0);
103 if(x.size() != y.size()){
104 std::cerr <<
"error bad vector size" << std::endl;
109 #pragma omp parallel private(regs)
112 get_isie(y.size(), is, ie);
113 reg alpha_hi = broadcast(alpha.x[0]);
114 reg alpha_lo = broadcast(alpha.x[1]);
115 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
117 reg x_hi = load(x.data()[i]);
118 reg x_lo = regs.zeros;
120 reg y_hi = load(y.data()[i]);
121 reg y_lo = regs.zeros;
123 Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
125 store(y.data()[i], y_hi);
128 Fma(y.data()[i], x.data()[i], 0.0, alpha.x[0], alpha.x[1], y.data()[i], 0.0);
136 std::cerr <<
"error bad vector size" << std::endl;
141 #pragma omp parallel private(regs)
144 get_isie(y.
size(), is, ie);
145 reg alpha_hi = broadcast(alpha);
146 reg alpha_lo = regs.zeros;
147 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
149 reg x_hi = load(x.
hi[i]);
150 reg x_lo = load(x.
lo[i]);
152 reg y_hi = load(y.
hi[i]);
153 reg y_lo = load(y.
lo[i]);
155 Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
157 store(y.
hi[i], y_hi);
158 store(y.
lo[i], y_lo);
161 Fma(y.
hi[i], y.
lo[i], x.
hi[i], x.
lo[i], alpha, 0.0, y.
hi[i], y.
lo[i]);
167 if(x.size() != y.
size()){
168 std::cerr <<
"error bad vector size" << std::endl;
173 #pragma omp parallel private(regs)
176 get_isie(y.
size(), is, ie);
177 reg alpha_hi = broadcast(alpha);
178 reg alpha_lo = regs.zeros;
179 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
181 reg x_hi = load(x.data()[i]);
182 reg x_lo = regs.zeros;
184 reg y_hi = load(y.
hi[i]);
185 reg y_lo = load(y.
lo[i]);
187 Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
189 store(y.
hi[i], y_hi);
190 store(y.
lo[i], y_lo);
193 Fma(y.
hi[i], y.
lo[i], x.data()[i], 0.0, alpha, 0.0, y.
hi[i], y.
lo[i]);
199 if(x.
size() != y.size()){
200 std::cerr <<
"error bad vector size" << std::endl;
205 #pragma omp parallel private(regs)
208 get_isie(y.size(), is, ie);
209 reg alpha_hi = broadcast(alpha);
210 reg alpha_lo = regs.zeros;
211 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
213 reg x_hi = load(x.
hi[i]);
214 reg x_lo = load(x.
lo[i]);
216 reg y_hi = load(y.data()[i]);
217 reg y_lo = regs.zeros;
219 Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
221 store(y.data()[i], y_hi);
224 Fma(y.data()[i], x.
hi[i], x.
lo[i], alpha, 0.0, y.data()[i], 0.0);
230 if(x.size() != y.size()){
231 std::cerr <<
"error bad vector size" << std::endl;
236 #pragma omp parallel private(regs)
239 get_isie(y.size(), is, ie);
240 reg alpha_hi = broadcast(alpha);
241 reg alpha_lo = regs.zeros;
242 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
244 reg x_hi = load(x.data()[i]);
245 reg x_lo = regs.zeros;
247 reg y_hi = load(y.data()[i]);
248 reg y_lo = regs.zeros;
250 Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
252 store(y.data()[i], y_hi);
255 Fma(y.data()[i], x.data()[i], 0.0, alpha, 0.0, y.data()[i], 0.0);
Double precision vector class, This class is almost same as std::vector<double>
Double-double precision vector class.
void xpay(const dd_real &alpha, const dd_real_vector &x, dd_real_vector &y)
xpay: y = x+ay