1 #include<DD-AVX_internal.hpp>
2 using namespace ddavx_core;
8 std::cerr <<
"error bad vector size" << std::endl;
13 #pragma omp parallel private(regs)
16 get_isie(y.
size(), is, ie);
17 reg alpha_hi = broadcast(alpha.x[0]);
18 reg alpha_lo = broadcast(alpha.x[1]);
19 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
21 reg x_hi = load(x.
hi[i]);
22 reg x_lo = load(x.
lo[i]);
24 reg y_hi = load(y.
hi[i]);
25 reg y_lo = load(y.
lo[i]);
27 Fma(y_hi, y_lo, y_hi, y_lo, alpha_hi, alpha_lo, x_hi, x_lo, regs);
33 Fma(y.
hi[i], y.
lo[i], y.
hi[i], y.
lo[i], alpha.x[0], alpha.x[1], x.
hi[i], x.
lo[i]);
39 if(x.size() != y.
size()){
40 std::cerr <<
"error bad vector size" << std::endl;
45 #pragma omp parallel private(regs)
48 get_isie(y.
size(), is, ie);
49 reg alpha_hi = broadcast(alpha.x[0]);
50 reg alpha_lo = broadcast(alpha.x[1]);
51 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
53 reg x_hi = load(x.data()[i]);
55 reg y_hi = load(y.
hi[i]);
56 reg y_lo = load(y.
lo[i]);
58 Fmad(y_hi, y_lo, y_hi, y_lo, alpha_hi, alpha_lo, x_hi, regs);
64 Fmad(y.
hi[i], y.
lo[i], y.
hi[i], y.
lo[i], alpha.x[0], alpha.x[1], x.data()[i]);
70 if(x.
size() != y.size()){
71 std::cerr <<
"error bad vector size" << std::endl;
76 #pragma omp parallel private(regs)
79 get_isie(y.size(), is, ie);
80 reg alpha_hi = broadcast(alpha.x[0]);
81 reg alpha_lo = broadcast(alpha.x[1]);
82 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
84 reg x_hi = load(x.
hi[i]);
85 reg x_lo = load(x.
lo[i]);
87 reg y_hi = load(y.data()[i]);
88 reg y_lo = regs.zeros;
90 Fma(y_hi, y_hi, y_lo, alpha_hi, alpha_lo, x_hi, x_lo, regs);
92 store(y.data()[i], y_hi);
95 Fma(y.data()[i], y.data()[i], 0.0, alpha.x[0], alpha.x[1], x.
hi[i], x.
lo[i]);
101 if(x.size() != y.size()){
102 std::cerr <<
"error bad vector size" << std::endl;
107 #pragma omp parallel private(regs)
110 get_isie(y.size(), is, ie);
111 reg alpha_hi = broadcast(alpha.x[0]);
112 reg alpha_lo = broadcast(0.0);
113 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
115 reg x_hi = load(x.data()[i]);
117 reg y_hi = load(y.data()[i]);
118 reg y_lo = regs.zeros;
120 Fmad(y_hi, y_lo, y_hi, y_lo, alpha_hi, alpha_lo, x_hi, regs);
122 store(y.data()[i], y_hi);
125 Fma(y.data()[i], y.data()[i], 0.0, alpha.x[0], alpha.x[1], x.data()[i], 0.0);
133 std::cerr <<
"error bad vector size" << std::endl;
138 #pragma omp parallel private(regs)
141 get_isie(y.
size(), is, ie);
142 reg alpha_hi = broadcast(alpha);
143 reg alpha_lo = broadcast(0.0);
144 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
146 reg x_hi = load(x.
hi[i]);
147 reg x_lo = load(x.
lo[i]);
149 reg y_hi = load(y.
hi[i]);
150 reg y_lo = load(y.
lo[i]);
152 Fmad(y_hi, y_lo, y_hi, y_lo, x_hi, x_lo, alpha_hi, regs);
154 store(y.
hi[i], y_hi);
155 store(y.
lo[i], y_lo);
158 Fma(y.
hi[i], y.
lo[i], y.
hi[i], y.
lo[i], alpha, 0.0, x.
hi[i], x.
lo[i]);
164 if(x.size() != y.
size()){
165 std::cerr <<
"error bad vector size" << std::endl;
170 #pragma omp parallel private(regs)
173 get_isie(y.
size(), is, ie);
174 reg alpha_hi = broadcast(alpha);
175 reg alpha_lo = broadcast(0.0);
176 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
178 reg x_hi = load(x.data()[i]);
179 reg x_lo = regs.zeros;
181 reg y_hi = load(y.
hi[i]);
182 reg y_lo = load(y.
lo[i]);
184 Fmad(y_hi, y_lo, y_hi, y_lo, x_hi, x_lo, alpha_hi, regs);
186 store(y.
hi[i], y_hi);
187 store(y.
lo[i], y_lo);
190 Fma(y.
hi[i], y.
lo[i], y.
hi[i], y.
lo[i], alpha, 0.0, x.data()[i], 0.0);
196 if(x.
size() != y.size()){
197 std::cerr <<
"error bad vector size" << std::endl;
202 #pragma omp parallel private(regs)
205 get_isie(y.size(), is, ie);
206 reg alpha_hi = broadcast(alpha);
207 reg alpha_lo = broadcast(0.0);
208 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
210 reg x_hi = load(x.
hi[i]);
211 reg x_lo = load(x.
lo[i]);
213 reg y_hi = load(y.data()[i]);
214 reg y_lo = regs.zeros;
216 Fmad(y_hi, y_lo, y_hi, y_lo, x_hi, x_lo, alpha_hi, regs);
218 store(y.data()[i], y_hi);
221 Fma(y.data()[i], y.data()[i], 0.0, alpha, 0.0, x.
hi[i], x.
lo[i]);
227 if(x.size() != y.size()){
228 std::cout <<
"error vecvor size is" << x.size() << y.size() << std::endl;
232 #pragma omp parallel for
233 for(
int i = 0 ; i<y.size();i++){
234 y.data()[i] = y.data()[i] + alpha * x.data()[i];
Double precision vector class, This class is almost same as std::vector<double>
Double-double precision vector class.
void axpy(const dd_real &alpha, const dd_real_vector &x, dd_real_vector &y)
axpy: y = ax+y