DD-AVX  2.0.0
dot.cpp
Go to the documentation of this file.
1 #include<DD-AVX_internal.hpp>
2 using namespace ddavx_core;
3 
4 namespace dd_avx{
5 
6  dd_real dot(const dd_real_vector& x, const dd_real_vector& y){
7  if(x.size() != y.size()){
8  std::cerr << "error bad vector size" << std::endl;
9  assert(1);
10  }
11  registers regs;
12 
13  dd_real dot[omp_get_max_threads()];
14 
15 #pragma omp parallel private(regs)
16  {
17  int thN = omp_get_thread_num();
18  int i=0, is=0, ie=0;
19  get_isie(y.size(), is, ie);
20 
21  reg r_hi = regs.zeros;
22  reg r_lo = regs.zeros;
23 
24  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
25 
26  reg x_hi = load(x.hi[i]);
27  reg x_lo = load(x.lo[i]);
28 
29  reg y_hi = load(y.hi[i]);
30  reg y_lo = load(y.lo[i]);
31 
32  Fma(r_hi, r_lo, r_hi, r_lo, x_hi, x_lo, y_hi, y_lo, regs);
33  }
34 
35  dot[thN] = reduction(r_hi, r_lo);
36 
37  for(;i<ie;i++){
38  Fma(dot[thN].x[0], dot[thN].x[1], dot[thN].x[0], dot[thN].x[1], x.hi[i], x.lo[i], y.hi[i], y.lo[i]);
39  }
40  }
41 
42  dd_real tmp = 0;
43  for(int i=0; i < omp_get_max_threads(); i++){
44  tmp += dot[i];
45  }
46 
47  return tmp;
48  }
49 
50 
51  dd_real dot(const d_real_vector& x, const dd_real_vector& y){
52  if(x.size() != y.size()){
53  std::cerr << "error bad vector size" << std::endl;
54  assert(1);
55  }
56  registers regs;
57 
58  dd_real dot[omp_get_max_threads()];
59 
60 #pragma omp parallel private(regs)
61  {
62  int thN = omp_get_thread_num();
63  int i=0, is=0, ie=0;
64  get_isie(y.size(), is, ie);
65 
66  reg r_hi = regs.zeros;
67  reg r_lo = regs.zeros;
68 
69  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
70 
71  reg x_hi = load(x.data()[i]);
72 
73  reg y_hi = load(y.hi[i]);
74  reg y_lo = load(y.lo[i]);
75 
76  Fmad(r_hi, r_lo, r_hi, r_lo, y_hi, y_lo, x_hi, regs);
77  }
78 
79  dot[thN] = reduction(r_hi, r_lo);
80 
81  for(;i<ie;i++){
82  Fmad(dot[thN].x[0], dot[thN].x[1], dot[thN].x[0], dot[thN].x[1], y.hi[i], y.lo[i], x.data()[i]);
83  }
84  }
85 
86  dd_real tmp = 0;
87  for(int i=0; i < omp_get_max_threads(); i++){
88  tmp += dot[i];
89  }
90 
91  return tmp;
92  }
93 
94  dd_real dot(const dd_real_vector& x, const d_real_vector& y){
95  if(x.size() != y.size()){
96  std::cerr << "error bad vector size" << std::endl;
97  assert(1);
98  }
99  registers regs;
100 
101  dd_real dot[omp_get_max_threads()];
102 
103 #pragma omp parallel private(regs)
104  {
105  int thN = omp_get_thread_num();
106  int i=0, is=0, ie=0;
107  get_isie(y.size(), is, ie);
108 
109  reg r_hi = regs.zeros;
110  reg r_lo = regs.zeros;
111 
112  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
113 
114  reg x_hi = load(x.hi[i]);
115  reg x_lo = load(x.lo[i]);
116 
117  reg y_hi = load(y.data()[i]);
118 
119 
120  Fmad(r_hi, r_lo, r_hi, r_lo, x_hi, x_lo, y_hi, regs);
121  }
122 
123  dot[thN] = reduction(r_hi, r_lo);
124 
125  for(;i<ie;i++){
126  Fmad(dot[thN].x[0], dot[thN].x[1], dot[thN].x[0], dot[thN].x[1], x.hi[i], x.lo[i], y.data()[i]);
127  }
128  }
129 
130  dd_real tmp = 0;
131  for(int i=0; i < omp_get_max_threads(); i++){
132  tmp += dot[i];
133  }
134 
135  return tmp;
136  }
137 
138  dd_real dot(const d_real_vector& x, const d_real_vector& y){
139  if(x.size() != y.size()){
140  std::cerr << "error bad vector size" << std::endl;
141  assert(1);
142  }
143  registers regs;
144 
145  dd_real dot[omp_get_max_threads()];
146 
147 #pragma omp parallel private(regs)
148  {
149  int thN = omp_get_thread_num();
150  int i=0, is=0, ie=0;
151  get_isie(y.size(), is, ie);
152 
153  reg r_hi = regs.zeros;
154  reg r_lo = regs.zeros;
155 
156  for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
157 
158  reg x_hi = load(x.data()[i]);
159  reg x_lo = regs.zeros;
160 
161  reg y_hi = load(y.data()[i]);
162 
163 
164  Fmad(r_hi, r_lo, r_hi, r_lo, x_hi, x_lo, y_hi, regs);
165  }
166 
167  dot[thN] = reduction(r_hi, r_lo);
168 
169  for(;i<ie;i++){
170  Fmad(dot[thN].x[0], dot[thN].x[1], dot[thN].x[0], dot[thN].x[1], x.data()[i], 0.0, y.data()[i]);
171  }
172  }
173 
174  dd_real tmp = 0;
175  for(int i=0; i < omp_get_max_threads(); i++){
176  tmp += dot[i];
177  }
178 
179  return tmp;
180  }
181 }
Double precision vector class, This class is almost same as std::vector<double>
Double-double precision vector class.
std::vector< double > lo
int size() const
get size
std::vector< double > hi
dd_real dot(const dd_real_vector &x, const dd_real_vector &y)
dot: ans = (x,y)
Definition: dot.cpp:6