DD-AVX  2.0.0
xpay.cpp
Go to the documentation of this file.
1 #include<DD-AVX_internal.hpp>
2 using namespace ddavx_core;
3 //y = x + ay
4 
5 namespace dd_avx{
6 //alpha = DD ///////////////////////////////////////////
7  void xpay(const dd_real& alpha, const dd_real_vector& x, dd_real_vector& y){
8  if(x.size() != y.size()){
9  std::cerr << "error bad vector size" << std::endl;
10  assert(1);
11  }
12  registers regs;
13 
14 #pragma omp parallel private(regs)
15  {
16  int i=0, is=0, ie=0;
17  get_isie(y.size(), is, ie);
18  reg alpha_hi = broadcast(alpha.x[0]);
19  reg alpha_lo = broadcast(alpha.x[1]);
20  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
21 
22  reg x_hi = load(x.hi[i]);
23  reg x_lo = load(x.lo[i]);
24 
25  reg y_hi = load(y.hi[i]);
26  reg y_lo = load(y.lo[i]);
27 
28  Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
29 
30  store(y.hi[i], y_hi);
31  store(y.lo[i], y_lo);
32  }
33  for(;i<ie;i++){
34  Fma(y.hi[i], y.lo[i], x.hi[i], x.lo[i], alpha.x[0], alpha.x[1], y.hi[i], y.lo[i]);
35  }
36  }
37  }
38 
39  void xpay(const dd_real& alpha, const d_real_vector& x, dd_real_vector& y){
40  if(x.size() != y.size()){
41  std::cerr << "error bad vector size" << std::endl;
42  assert(1);
43  }
44  registers regs;
45 
46 #pragma omp parallel private(regs)
47  {
48  int i=0, is=0, ie=0;
49  get_isie(y.size(), is, ie);
50  reg alpha_hi = broadcast(alpha.x[0]);
51  reg alpha_lo = broadcast(alpha.x[1]);
52  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
53 
54  reg x_hi = load(x.data()[i]);
55  reg x_lo = regs.zeros;
56 
57  reg y_hi = load(y.hi[i]);
58  reg y_lo = load(y.lo[i]);
59 
60  Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
61 
62  store(y.hi[i], y_hi);
63  store(y.lo[i], y_lo);
64  }
65  for(;i<ie;i++){
66  Fma(y.hi[i], y.lo[i], x.data()[i], 0.0, alpha.x[0], alpha.x[1], y.hi[i], y.lo[i]);
67  }
68  }
69  }
70 
71  void xpay(const dd_real& alpha, const dd_real_vector& x, d_real_vector& y){
72  if(x.size() != y.size()){
73  std::cerr << "error bad vector size" << std::endl;
74  assert(1);
75  }
76  registers regs;
77 
78 #pragma omp parallel private(regs)
79  {
80  int i=0, is=0, ie=0;
81  get_isie(y.size(), is, ie);
82  reg alpha_hi = broadcast(alpha.x[0]);
83  reg alpha_lo = broadcast(alpha.x[1]);
84  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
85 
86  reg x_hi = load(x.hi[i]);
87  reg x_lo = load(x.lo[i]);
88 
89  reg y_hi = load(y.data()[i]);
90  reg y_lo = regs.zeros;
91 
92  Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
93 
94  store(y.data()[i], y_hi);
95  }
96  for(;i<ie;i++){
97  Fma(y.data()[i], x.hi[i], x.lo[i], alpha.x[0], alpha.x[1], y.data()[i], 0.0);
98  }
99  }
100  }
101 
102  void xpay(const dd_real& alpha, const d_real_vector& x, d_real_vector& y){
103  if(x.size() != y.size()){
104  std::cerr << "error bad vector size" << std::endl;
105  assert(1);
106  }
107  registers regs;
108 
109 #pragma omp parallel private(regs)
110  {
111  int i=0, is=0, ie=0;
112  get_isie(y.size(), is, ie);
113  reg alpha_hi = broadcast(alpha.x[0]);
114  reg alpha_lo = broadcast(alpha.x[1]);
115  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
116 
117  reg x_hi = load(x.data()[i]);
118  reg x_lo = regs.zeros;
119 
120  reg y_hi = load(y.data()[i]);
121  reg y_lo = regs.zeros;
122 
123  Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
124 
125  store(y.data()[i], y_hi);
126  }
127  for(;i<ie;i++){
128  Fma(y.data()[i], x.data()[i], 0.0, alpha.x[0], alpha.x[1], y.data()[i], 0.0);
129  }
130  }
131  }
132 
133 //alpha = D ///////////////////////////////////////////
134  void xpay(const d_real& alpha, const dd_real_vector& x, dd_real_vector& y){
135  if(x.size() != y.size()){
136  std::cerr << "error bad vector size" << std::endl;
137  assert(1);
138  }
139  registers regs;
140 
141 #pragma omp parallel private(regs)
142  {
143  int i=0, is=0, ie=0;
144  get_isie(y.size(), is, ie);
145  reg alpha_hi = broadcast(alpha);
146  reg alpha_lo = regs.zeros;
147  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
148 
149  reg x_hi = load(x.hi[i]);
150  reg x_lo = load(x.lo[i]);
151 
152  reg y_hi = load(y.hi[i]);
153  reg y_lo = load(y.lo[i]);
154 
155  Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
156 
157  store(y.hi[i], y_hi);
158  store(y.lo[i], y_lo);
159  }
160  for(;i<ie;i++){
161  Fma(y.hi[i], y.lo[i], x.hi[i], x.lo[i], alpha, 0.0, y.hi[i], y.lo[i]);
162  }
163  }
164  }
165 
166  void xpay(const d_real& alpha, const d_real_vector& x, dd_real_vector& y){
167  if(x.size() != y.size()){
168  std::cerr << "error bad vector size" << std::endl;
169  assert(1);
170  }
171  registers regs;
172 
173 #pragma omp parallel private(regs)
174  {
175  int i=0, is=0, ie=0;
176  get_isie(y.size(), is, ie);
177  reg alpha_hi = broadcast(alpha);
178  reg alpha_lo = regs.zeros;
179  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
180 
181  reg x_hi = load(x.data()[i]);
182  reg x_lo = regs.zeros;
183 
184  reg y_hi = load(y.hi[i]);
185  reg y_lo = load(y.lo[i]);
186 
187  Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
188 
189  store(y.hi[i], y_hi);
190  store(y.lo[i], y_lo);
191  }
192  for(;i<ie;i++){
193  Fma(y.hi[i], y.lo[i], x.data()[i], 0.0, alpha, 0.0, y.hi[i], y.lo[i]);
194  }
195  }
196  }
197 
198  void xpay(const d_real& alpha, const dd_real_vector& x, d_real_vector& y){
199  if(x.size() != y.size()){
200  std::cerr << "error bad vector size" << std::endl;
201  assert(1);
202  }
203  registers regs;
204 
205 #pragma omp parallel private(regs)
206  {
207  int i=0, is=0, ie=0;
208  get_isie(y.size(), is, ie);
209  reg alpha_hi = broadcast(alpha);
210  reg alpha_lo = regs.zeros;
211  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
212 
213  reg x_hi = load(x.hi[i]);
214  reg x_lo = load(x.lo[i]);
215 
216  reg y_hi = load(y.data()[i]);
217  reg y_lo = regs.zeros;
218 
219  Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
220 
221  store(y.data()[i], y_hi);
222  }
223  for(;i<ie;i++){
224  Fma(y.data()[i], x.hi[i], x.lo[i], alpha, 0.0, y.data()[i], 0.0);
225  }
226  }
227  }
228 
229  void xpay(const d_real& alpha, const d_real_vector& x, d_real_vector& y){
230  if(x.size() != y.size()){
231  std::cerr << "error bad vector size" << std::endl;
232  assert(1);
233  }
234  registers regs;
235 
236 #pragma omp parallel private(regs)
237  {
238  int i=0, is=0, ie=0;
239  get_isie(y.size(), is, ie);
240  reg alpha_hi = broadcast(alpha);
241  reg alpha_lo = regs.zeros;
242  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
243 
244  reg x_hi = load(x.data()[i]);
245  reg x_lo = regs.zeros;
246 
247  reg y_hi = load(y.data()[i]);
248  reg y_lo = regs.zeros;
249 
250  Fma(y_hi, y_lo, x_hi, x_lo, alpha_hi, alpha_lo, y_hi, y_lo, regs);
251 
252  store(y.data()[i], y_hi);
253  }
254  for(;i<ie;i++){
255  Fma(y.data()[i], x.data()[i], 0.0, alpha, 0.0, y.data()[i], 0.0);
256  }
257  }
258  }
259 }
double d_real
Definition: DD-AVX.hpp:22
Double precision vector class, This class is almost same as std::vector<double>
Double-double precision vector class.
std::vector< double > lo
int size() const
get size
std::vector< double > hi
void xpay(const dd_real &alpha, const dd_real_vector &x, dd_real_vector &y)
xpay: y = x+ay
Definition: xpay.cpp:7