DD-AVX  2.0.0
axpy.cpp
Go to the documentation of this file.
1 #include<DD-AVX_internal.hpp>
2 using namespace ddavx_core;
3 
4 namespace dd_avx{
5 //alpha = DD ///////////////////////////////////////////
6  void axpy(const dd_real& alpha, const dd_real_vector& x, dd_real_vector& y){
7  if(x.size() != y.size()){
8  std::cerr << "error bad vector size" << std::endl;
9  assert(1);
10  }
11  registers regs;
12 
13 #pragma omp parallel private(regs)
14  {
15  int i=0, is=0, ie=0;
16  get_isie(y.size(), is, ie);
17  reg alpha_hi = broadcast(alpha.x[0]);
18  reg alpha_lo = broadcast(alpha.x[1]);
19  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
20 
21  reg x_hi = load(x.hi[i]);
22  reg x_lo = load(x.lo[i]);
23 
24  reg y_hi = load(y.hi[i]);
25  reg y_lo = load(y.lo[i]);
26 
27  Fma(y_hi, y_lo, y_hi, y_lo, alpha_hi, alpha_lo, x_hi, x_lo, regs);
28 
29  store(y.hi[i], y_hi);
30  store(y.lo[i], y_lo);
31  }
32  for(;i<ie;i++){
33  Fma(y.hi[i], y.lo[i], y.hi[i], y.lo[i], alpha.x[0], alpha.x[1], x.hi[i], x.lo[i]);
34  }
35  }
36  }
37 
38  void axpy(const dd_real& alpha, const d_real_vector& x, dd_real_vector& y){
39  if(x.size() != y.size()){
40  std::cerr << "error bad vector size" << std::endl;
41  assert(1);
42  }
43  registers regs;
44 
45 #pragma omp parallel private(regs)
46  {
47  int i=0, is=0, ie=0;
48  get_isie(y.size(), is, ie);
49  reg alpha_hi = broadcast(alpha.x[0]);
50  reg alpha_lo = broadcast(alpha.x[1]);
51  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
52 
53  reg x_hi = load(x.data()[i]);
54 
55  reg y_hi = load(y.hi[i]);
56  reg y_lo = load(y.lo[i]);
57 
58  Fmad(y_hi, y_lo, y_hi, y_lo, alpha_hi, alpha_lo, x_hi, regs);
59 
60  store(y.hi[i], y_hi);
61  store(y.lo[i], y_lo);
62  }
63  for(;i<ie;i++){
64  Fmad(y.hi[i], y.lo[i], y.hi[i], y.lo[i], alpha.x[0], alpha.x[1], x.data()[i]);
65  }
66  }
67  }
68 
69  void axpy(const dd_real& alpha, const dd_real_vector& x, d_real_vector& y){
70  if(x.size() != y.size()){
71  std::cerr << "error bad vector size" << std::endl;
72  assert(1);
73  }
74  registers regs;
75 
76 #pragma omp parallel private(regs)
77  {
78  int i=0, is=0, ie=0;
79  get_isie(y.size(), is, ie);
80  reg alpha_hi = broadcast(alpha.x[0]);
81  reg alpha_lo = broadcast(alpha.x[1]);
82  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
83 
84  reg x_hi = load(x.hi[i]);
85  reg x_lo = load(x.lo[i]);
86 
87  reg y_hi = load(y.data()[i]);
88  reg y_lo = regs.zeros;
89 
90  Fma(y_hi, y_hi, y_lo, alpha_hi, alpha_lo, x_hi, x_lo, regs);
91 
92  store(y.data()[i], y_hi);
93  }
94  for(;i<ie;i++){
95  Fma(y.data()[i], y.data()[i], 0.0, alpha.x[0], alpha.x[1], x.hi[i], x.lo[i]);
96  }
97  }
98  }
99 
100  void axpy(const dd_real& alpha, const d_real_vector& x, d_real_vector& y){
101  if(x.size() != y.size()){
102  std::cerr << "error bad vector size" << std::endl;
103  assert(1);
104  }
105  registers regs;
106 
107 #pragma omp parallel private(regs)
108  {
109  int i=0, is=0, ie=0;
110  get_isie(y.size(), is, ie);
111  reg alpha_hi = broadcast(alpha.x[0]);
112  reg alpha_lo = broadcast(0.0);
113  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
114 
115  reg x_hi = load(x.data()[i]);
116 
117  reg y_hi = load(y.data()[i]);
118  reg y_lo = regs.zeros;
119 
120  Fmad(y_hi, y_lo, y_hi, y_lo, alpha_hi, alpha_lo, x_hi, regs);
121 
122  store(y.data()[i], y_hi);
123  }
124  for(;i<ie;i++){
125  Fma(y.data()[i], y.data()[i], 0.0, alpha.x[0], alpha.x[1], x.data()[i], 0.0);
126  }
127  }
128  }
129 
130 //alpha = D ///////////////////////////////////////////
131  void axpy(const d_real& alpha, const dd_real_vector& x, dd_real_vector& y){
132  if(x.size() != y.size()){
133  std::cerr << "error bad vector size" << std::endl;
134  assert(1);
135  }
136  registers regs;
137 
138 #pragma omp parallel private(regs)
139  {
140  int i=0, is=0, ie=0;
141  get_isie(y.size(), is, ie);
142  reg alpha_hi = broadcast(alpha);
143  reg alpha_lo = broadcast(0.0);
144  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
145 
146  reg x_hi = load(x.hi[i]);
147  reg x_lo = load(x.lo[i]);
148 
149  reg y_hi = load(y.hi[i]);
150  reg y_lo = load(y.lo[i]);
151 
152  Fmad(y_hi, y_lo, y_hi, y_lo, x_hi, x_lo, alpha_hi, regs);
153 
154  store(y.hi[i], y_hi);
155  store(y.lo[i], y_lo);
156  }
157  for(;i<ie;i++){
158  Fma(y.hi[i], y.lo[i], y.hi[i], y.lo[i], alpha, 0.0, x.hi[i], x.lo[i]);
159  }
160  }
161  }
162 
163  void axpy(const d_real& alpha, const d_real_vector& x, dd_real_vector& y){
164  if(x.size() != y.size()){
165  std::cerr << "error bad vector size" << std::endl;
166  assert(1);
167  }
168  registers regs;
169 
170 #pragma omp parallel private(regs)
171  {
172  int i=0, is=0, ie=0;
173  get_isie(y.size(), is, ie);
174  reg alpha_hi = broadcast(alpha);
175  reg alpha_lo = broadcast(0.0);
176  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
177 
178  reg x_hi = load(x.data()[i]);
179  reg x_lo = regs.zeros;
180 
181  reg y_hi = load(y.hi[i]);
182  reg y_lo = load(y.lo[i]);
183 
184  Fmad(y_hi, y_lo, y_hi, y_lo, x_hi, x_lo, alpha_hi, regs);
185 
186  store(y.hi[i], y_hi);
187  store(y.lo[i], y_lo);
188  }
189  for(;i<ie;i++){
190  Fma(y.hi[i], y.lo[i], y.hi[i], y.lo[i], alpha, 0.0, x.data()[i], 0.0);
191  }
192  }
193  }
194 
195  void axpy(const d_real& alpha, const dd_real_vector& x, d_real_vector& y){
196  if(x.size() != y.size()){
197  std::cerr << "error bad vector size" << std::endl;
198  assert(1);
199  }
200  registers regs;
201 
202 #pragma omp parallel private(regs)
203  {
204  int i=0, is=0, ie=0;
205  get_isie(y.size(), is, ie);
206  reg alpha_hi = broadcast(alpha);
207  reg alpha_lo = broadcast(0.0);
208  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
209 
210  reg x_hi = load(x.hi[i]);
211  reg x_lo = load(x.lo[i]);
212 
213  reg y_hi = load(y.data()[i]);
214  reg y_lo = regs.zeros;
215 
216  Fmad(y_hi, y_lo, y_hi, y_lo, x_hi, x_lo, alpha_hi, regs);
217 
218  store(y.data()[i], y_hi);
219  }
220  for(;i<ie;i++){
221  Fma(y.data()[i], y.data()[i], 0.0, alpha, 0.0, x.hi[i], x.lo[i]);
222  }
223  }
224  }
225 
226  void axpy(const d_real& alpha, const d_real_vector& x, d_real_vector& y){
227  if(x.size() != y.size()){
228  std::cout << "error vecvor size is" << x.size() << y.size() << std::endl;
229  assert(1);
230  }
231 
232 #pragma omp parallel for
233  for(int i = 0 ; i<y.size();i++){
234  y.data()[i] = y.data()[i] + alpha * x.data()[i];
235  }
236  }
237 }
double d_real
Definition: DD-AVX.hpp:22
Double precision vector class, This class is almost same as std::vector<double>
Double-double precision vector class.
std::vector< double > lo
int size() const
get size
std::vector< double > hi
void axpy(const dd_real &alpha, const dd_real_vector &x, dd_real_vector &y)
axpy: y = ax+y
Definition: axpy.cpp:6