diff --git a/Earthfile b/Earthfile index 1e2cbf1..bdf428a 100644 --- a/Earthfile +++ b/Earthfile @@ -32,6 +32,7 @@ collect-data: BUILD +clj BUILD +clj-bb BUILD +cpp + BUILD +cpp-avx2 BUILD +crystal BUILD +cs BUILD +d @@ -41,6 +42,7 @@ collect-data: BUILD +java BUILD +julia BUILD +julia-compiled + BUILD +julia-ux4 BUILD +nodejs BUILD +lua BUILD +luajit @@ -114,6 +116,14 @@ cpp: RUN --no-cache g++ leibniz.cpp -o leibniz -O3 -s -static -flto -march=native -mtune=native -fomit-frame-pointer -fno-signed-zeros -fno-trapping-math -fassociative-math DO +BENCH --name="cpp" --lang="C++ (g++)" --version="g++ --version" --cmd="./leibniz" +cpp-avx2: + FROM +alpine + RUN apk add --no-cache gcc build-base + + COPY ./src/leibniz_avx2.cpp ./ + RUN --no-cache g++ leibniz_avx2.cpp -o leibniz_avx2 -O3 -s -static -flto -march=native -mtune=native -fomit-frame-pointer -fno-signed-zeros -fno-trapping-math -fassociative-math + DO +BENCH --name="cpp-avx2" --lang="C++ (avx2)" --version="g++ --version" --cmd="./leibniz_avx2" + crystal: FROM crystallang/crystal:1.6-alpine RUN apk add --no-cache hyperfine @@ -221,6 +231,16 @@ julia-compiled: RUN julia -e 'using Pkg; Pkg.add(["StaticCompiler", "StaticTools"]); using StaticCompiler, StaticTools; include("./leibniz_compiled.jl"); compile_executable(mainjl, (), "./")' DO +BENCH --name="julia-compiled" --lang="Julia (AOT compiled)" --version="julia --version" --cmd="./mainjl" +julia-ux4: + # We have to use a special image since there is no Julia package on alpine 🤷‍♂️ + FROM julia:1.8.2-alpine3.16 + RUN apk add --no-cache hyperfine + COPY +build/scmeta ./ + + COPY ./src/rounds.txt ./ + COPY ./src/leibniz_ux4.jl ./ + DO +BENCH --name="julia-ux4" --lang="Julia (ux4)" --version="julia --version" --cmd="julia leibniz_ux4.jl" + nodejs: FROM +alpine RUN apk add --no-cache nodejs-current diff --git a/src/leibniz_avx2.cpp b/src/leibniz_avx2.cpp new file mode 100644 index 0000000..42c73b0 --- /dev/null +++ b/src/leibniz_avx2.cpp @@ -0,0 +1,56 @@ +#include + +#include + + +double _x = 1.0; +double pi = 1.0; + + +int main() +{ + unsigned rounds; + unsigned int unroll = 4; + auto infile = std::fopen("rounds.txt", "r"); // open file + std::fscanf(infile, "%u", &rounds); // read from file + std::fclose(infile); // close file + + __m256d x = _mm256_set_pd(-1.0,1.0,-1.0,1.0); + __m256d den = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d inc = _mm256_set_pd(4.0,4.0,4.0,4.0); + __m256d two = _mm256_set_pd(2.0,2.0,2.0,2.0); + __m256d mone = _mm256_set_pd(-1.0,-1.0,-1.0,-1.0); + __m256d one = _mm256_set_pd(1.0,1.0,1.0,1.0); + __m256d ivec = _mm256_set_pd(2.0,3.0,4.0,5.0); + __m256d pivec = _mm256_set_pd(0.0,0.0,0.0,0.0); + + rounds += 2u; // do this outside the loop + unsigned int vec_end = rounds - rounds % unroll; + + for (unsigned i=2u ; i < vec_end ; i+=unroll) // use ++i instead of i++ + { + //#x = -x; // some compilers optimize this better than x *= -1 + // compute den = (2 * i - 1) + den = _mm256_add_pd(_mm256_mul_pd(two,ivec),mone); + + // increment ivec, so ivec += inc + ivec = _mm256_add_pd(ivec,inc); + + // compute partial sums + pivec = _mm256_add_pd(pivec,_mm256_div_pd(x,den)); + } + + // gather the partial sums + double* pi_v = (double*)&pivec; + pi += pi_v[0] + pi_v[1] + pi_v[2] + pi_v[3]; + + // now the wind-down loop + for (unsigned i=vec_end ; i < rounds ; ++i) + { + _x = -_x; + pi += (_x / (2u * i - 1u)); + } + + pi *= 4; + std::printf("%.16f\n", pi); // print 16 decimal digits of pi +} diff --git a/src/leibniz_ux4.jl b/src/leibniz_ux4.jl new file mode 100644 index 0000000..ee3d39d --- /dev/null +++ b/src/leibniz_ux4.jl @@ -0,0 +1,20 @@ +function f(rounds) + pi = 1.0 + x = -1.0 + r2 = rounds + 2 + vend = r2 - r2 % 4 + @simd for i in 2:4:r2 + pi += x / (2.0 * i - 1.0) - + x / (2.0 * i + 1.0) + + x / (2.0 * i + 3.0) - + x / (2.0 * i + 5.0) + end + for i in vend+1:r2 + pi += 1.0 / (2.0 * (i + 0.0) - 1.0) + x = -x + end + return pi*4 +end + +rounds = parse(Int64, readchomp("rounds.txt")) +print(f(rounds))