1 #include<DD-AVX_internal.hpp>
2 using namespace ddavx_core;
8 std::cerr <<
"error bad vector size" << std::endl;
13 dd_real
dot[omp_get_max_threads()];
15 #pragma omp parallel private(regs)
17 int thN = omp_get_thread_num();
19 get_isie(y.
size(), is, ie);
21 reg r_hi = regs.zeros;
22 reg r_lo = regs.zeros;
24 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
26 reg x_hi = load(x.
hi[i]);
27 reg x_lo = load(x.
lo[i]);
29 reg y_hi = load(y.
hi[i]);
30 reg y_lo = load(y.
lo[i]);
32 Fma(r_hi, r_lo, r_hi, r_lo, x_hi, x_lo, y_hi, y_lo, regs);
35 dot[thN] = reduction(r_hi, r_lo);
38 Fma(
dot[thN].x[0],
dot[thN].x[1],
dot[thN].x[0],
dot[thN].x[1], x.hi[i], x.lo[i], y.
hi[i], y.
lo[i]);
43 for(
int i=0; i < omp_get_max_threads(); i++){
52 if(x.size() != y.
size()){
53 std::cerr <<
"error bad vector size" << std::endl;
58 dd_real
dot[omp_get_max_threads()];
60 #pragma omp parallel private(regs)
62 int thN = omp_get_thread_num();
64 get_isie(y.
size(), is, ie);
66 reg r_hi = regs.zeros;
67 reg r_lo = regs.zeros;
69 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
71 reg x_hi = load(x.data()[i]);
73 reg y_hi = load(y.
hi[i]);
74 reg y_lo = load(y.
lo[i]);
76 Fmad(r_hi, r_lo, r_hi, r_lo, y_hi, y_lo, x_hi, regs);
79 dot[thN] = reduction(r_hi, r_lo);
82 Fmad(
dot[thN].x[0],
dot[thN].x[1],
dot[thN].x[0],
dot[thN].x[1], y.
hi[i], y.
lo[i], x.data()[i]);
87 for(
int i=0; i < omp_get_max_threads(); i++){
95 if(x.
size() != y.size()){
96 std::cerr <<
"error bad vector size" << std::endl;
101 dd_real
dot[omp_get_max_threads()];
103 #pragma omp parallel private(regs)
105 int thN = omp_get_thread_num();
107 get_isie(y.size(), is, ie);
109 reg r_hi = regs.zeros;
110 reg r_lo = regs.zeros;
112 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
114 reg x_hi = load(x.
hi[i]);
115 reg x_lo = load(x.
lo[i]);
117 reg y_hi = load(y.data()[i]);
120 Fmad(r_hi, r_lo, r_hi, r_lo, x_hi, x_lo, y_hi, regs);
123 dot[thN] = reduction(r_hi, r_lo);
126 Fmad(
dot[thN].x[0],
dot[thN].x[1],
dot[thN].x[0],
dot[thN].x[1], x.hi[i], x.lo[i], y.data()[i]);
131 for(
int i=0; i < omp_get_max_threads(); i++){
139 if(x.size() != y.size()){
140 std::cerr <<
"error bad vector size" << std::endl;
145 dd_real
dot[omp_get_max_threads()];
147 #pragma omp parallel private(regs)
149 int thN = omp_get_thread_num();
151 get_isie(y.size(), is, ie);
153 reg r_hi = regs.zeros;
154 reg r_lo = regs.zeros;
156 for(i = is; i < (ie-SIMD_Length+1); i += SIMD_Length){
158 reg x_hi = load(x.data()[i]);
159 reg x_lo = regs.zeros;
161 reg y_hi = load(y.data()[i]);
164 Fmad(r_hi, r_lo, r_hi, r_lo, x_hi, x_lo, y_hi, regs);
167 dot[thN] = reduction(r_hi, r_lo);
170 Fmad(
dot[thN].x[0],
dot[thN].x[1],
dot[thN].x[0],
dot[thN].x[1], x.data()[i], 0.0, y.data()[i]);
175 for(
int i=0; i < omp_get_max_threads(); i++){
Double precision vector class, This class is almost same as std::vector<double>
Double-double precision vector class.
dd_real dot(const dd_real_vector &x, const dd_real_vector &y)
dot: ans = (x,y)