From 072763435251812df97ba0bdfd1f9d9f35de7d6c Mon Sep 17 00:00:00 2001 From: Joe Landman Date: Sat, 22 Oct 2022 20:41:51 -0400 Subject: [PATCH 1/7] AVX2 variation of c++ Use `-march=native -O` to enable. --- leibniz_avx2.cpp | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 leibniz_avx2.cpp diff --git a/leibniz_avx2.cpp b/leibniz_avx2.cpp new file mode 100644 index 0000000..42c73b0 --- /dev/null +++ b/leibniz_avx2.cpp @@ -0,0 +1,56 @@ +#include + +#include + + +double _x = 1.0; +double pi = 1.0; + + +int main() +{ + unsigned rounds; + unsigned int unroll = 4; + auto infile = std::fopen("rounds.txt", "r"); // open file + std::fscanf(infile, "%u", &rounds); // read from file + std::fclose(infile); // close file + + __m256d x = _mm256_set_pd(-1.0,1.0,-1.0,1.0); + __m256d den = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d inc = _mm256_set_pd(4.0,4.0,4.0,4.0); + __m256d two = _mm256_set_pd(2.0,2.0,2.0,2.0); + __m256d mone = _mm256_set_pd(-1.0,-1.0,-1.0,-1.0); + __m256d one = _mm256_set_pd(1.0,1.0,1.0,1.0); + __m256d ivec = _mm256_set_pd(2.0,3.0,4.0,5.0); + __m256d pivec = _mm256_set_pd(0.0,0.0,0.0,0.0); + + rounds += 2u; // do this outside the loop + unsigned int vec_end = rounds - rounds % unroll; + + for (unsigned i=2u ; i < vec_end ; i+=unroll) // use ++i instead of i++ + { + //#x = -x; // some compilers optimize this better than x *= -1 + // compute den = (2 * i - 1) + den = _mm256_add_pd(_mm256_mul_pd(two,ivec),mone); + + // increment ivec, so ivec += inc + ivec = _mm256_add_pd(ivec,inc); + + // compute partial sums + pivec = _mm256_add_pd(pivec,_mm256_div_pd(x,den)); + } + + // gather the partial sums + double* pi_v = (double*)&pivec; + pi += pi_v[0] + pi_v[1] + pi_v[2] + pi_v[3]; + + // now the wind-down loop + for (unsigned i=vec_end ; i < rounds ; ++i) + { + _x = -_x; + pi += (_x / (2u * i - 1u)); + } + + pi *= 4; + std::printf("%.16f\n", pi); // print 16 decimal digits of pi +} From 95e91ddec5b7da3a9645256e432678998a5f38a5 Mon Sep 17 00:00:00 2001 From: Joe Landman Date: Sat, 22 Oct 2022 21:09:30 -0400 Subject: [PATCH 2/7] Simplified/unrolled julia Unrolled 4 times. Similar to the AVX2 C++ code concept. --- src/leibniz_ux4.jl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/leibniz_ux4.jl diff --git a/src/leibniz_ux4.jl b/src/leibniz_ux4.jl new file mode 100644 index 0000000..ee3d39d --- /dev/null +++ b/src/leibniz_ux4.jl @@ -0,0 +1,20 @@ +function f(rounds) + pi = 1.0 + x = -1.0 + r2 = rounds + 2 + vend = r2 - r2 % 4 + @simd for i in 2:4:r2 + pi += x / (2.0 * i - 1.0) - + x / (2.0 * i + 1.0) + + x / (2.0 * i + 3.0) - + x / (2.0 * i + 5.0) + end + for i in vend+1:r2 + pi += 1.0 / (2.0 * (i + 0.0) - 1.0) + x = -x + end + return pi*4 +end + +rounds = parse(Int64, readchomp("rounds.txt")) +print(f(rounds)) From 3d4e8ac370e96a1c2c4d937cb27b89624255041a Mon Sep 17 00:00:00 2001 From: Joe Landman Date: Sun, 23 Oct 2022 19:59:10 -0400 Subject: [PATCH 3/7] Update Earthfile --- Earthfile | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Earthfile b/Earthfile index 1e2cbf1..18c1b1a 100644 --- a/Earthfile +++ b/Earthfile @@ -32,6 +32,7 @@ collect-data: BUILD +clj BUILD +clj-bb BUILD +cpp + BUILD +cpp-avx2 BUILD +crystal BUILD +cs BUILD +d @@ -41,6 +42,7 @@ collect-data: BUILD +java BUILD +julia BUILD +julia-compiled + BUILD +julia_ux4 BUILD +nodejs BUILD +lua BUILD +luajit @@ -114,6 +116,14 @@ cpp: RUN --no-cache g++ leibniz.cpp -o leibniz -O3 -s -static -flto -march=native -mtune=native -fomit-frame-pointer -fno-signed-zeros -fno-trapping-math -fassociative-math DO +BENCH --name="cpp" --lang="C++ (g++)" --version="g++ --version" --cmd="./leibniz" +cpp-avx2: + FROM +alpine + RUN apk add --no-cache gcc build-base + + COPY ./src/leibniz_avx2.cpp ./ + RUN --no-cache g++ leibniz.cpp -o leibniz_avx2 -O3 -s -static -flto -march=native -mtune=native -fomit-frame-pointer -fno-signed-zeros -fno-trapping-math -fassociative-math + DO +BENCH --name="cpp-avx2" --lang="C++ (g++)" --version="g++ --version" --cmd="./leibniz_avx2" + crystal: FROM crystallang/crystal:1.6-alpine RUN apk add --no-cache hyperfine @@ -221,6 +231,17 @@ julia-compiled: RUN julia -e 'using Pkg; Pkg.add(["StaticCompiler", "StaticTools"]); using StaticCompiler, StaticTools; include("./leibniz_compiled.jl"); compile_executable(mainjl, (), "./")' DO +BENCH --name="julia-compiled" --lang="Julia (AOT compiled)" --version="julia --version" --cmd="./mainjl" +julia-ux4: + # We have to use a special image since there is no Julia package on alpine 🤷‍♂️ + FROM julia:1.8.2-alpine3.16 + RUN apk add --no-cache hyperfine + COPY +build/scmeta ./ + + COPY ./src/rounds.txt ./ + COPY ./src/leibniz_ux4.jl ./ + DO +BENCH --name="julia" --lang="Julia" --version="julia --version" --cmd="julia leibniz_ux4.jl" + + nodejs: FROM +alpine RUN apk add --no-cache nodejs-current From 293c52f1499b65944789bcc7b5e9bc05dc4bf0b7 Mon Sep 17 00:00:00 2001 From: Niklas Heer Date: Mon, 24 Oct 2022 09:03:11 +0200 Subject: [PATCH 4/7] =?UTF-8?q?=F0=9F=93=9D=20Add=20distinctive=20names?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Earthfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Earthfile b/Earthfile index 18c1b1a..d6df506 100644 --- a/Earthfile +++ b/Earthfile @@ -122,7 +122,7 @@ cpp-avx2: COPY ./src/leibniz_avx2.cpp ./ RUN --no-cache g++ leibniz.cpp -o leibniz_avx2 -O3 -s -static -flto -march=native -mtune=native -fomit-frame-pointer -fno-signed-zeros -fno-trapping-math -fassociative-math - DO +BENCH --name="cpp-avx2" --lang="C++ (g++)" --version="g++ --version" --cmd="./leibniz_avx2" + DO +BENCH --name="cpp-avx2" --lang="C++ (avx2)" --version="g++ --version" --cmd="./leibniz_avx2" crystal: FROM crystallang/crystal:1.6-alpine @@ -239,8 +239,7 @@ julia-ux4: COPY ./src/rounds.txt ./ COPY ./src/leibniz_ux4.jl ./ - DO +BENCH --name="julia" --lang="Julia" --version="julia --version" --cmd="julia leibniz_ux4.jl" - + DO +BENCH --name="julia" --lang="Julia (ux4)" --version="julia --version" --cmd="julia leibniz_ux4.jl" nodejs: FROM +alpine From 18b31663cc1a181ab75d4fee05d884de6500333c Mon Sep 17 00:00:00 2001 From: Niklas Heer Date: Mon, 24 Oct 2022 09:47:12 +0200 Subject: [PATCH 5/7] =?UTF-8?q?=E2=9C=8F=EF=B8=8F=20Fix=20typo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Earthfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Earthfile b/Earthfile index d6df506..4a854e2 100644 --- a/Earthfile +++ b/Earthfile @@ -42,7 +42,7 @@ collect-data: BUILD +java BUILD +julia BUILD +julia-compiled - BUILD +julia_ux4 + BUILD +julia-ux4 BUILD +nodejs BUILD +lua BUILD +luajit @@ -239,7 +239,7 @@ julia-ux4: COPY ./src/rounds.txt ./ COPY ./src/leibniz_ux4.jl ./ - DO +BENCH --name="julia" --lang="Julia (ux4)" --version="julia --version" --cmd="julia leibniz_ux4.jl" + DO +BENCH --name="julia-ux4" --lang="Julia (ux4)" --version="julia --version" --cmd="julia leibniz_ux4.jl" nodejs: FROM +alpine From a2100679393b961ff316f54211949f2efdfa6770 Mon Sep 17 00:00:00 2001 From: Niklas Heer Date: Mon, 24 Oct 2022 11:10:19 +0200 Subject: [PATCH 6/7] =?UTF-8?q?=E2=9C=8F=EF=B8=8F=20Move=20into=20the=20ri?= =?UTF-8?q?ght=20directory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- leibniz_avx2.cpp => src/leibniz_avx2.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename leibniz_avx2.cpp => src/leibniz_avx2.cpp (100%) diff --git a/leibniz_avx2.cpp b/src/leibniz_avx2.cpp similarity index 100% rename from leibniz_avx2.cpp rename to src/leibniz_avx2.cpp From 3f462ec1015edddc9c83a5c1c06eafcbeacffda7 Mon Sep 17 00:00:00 2001 From: Niklas Heer Date: Mon, 24 Oct 2022 11:19:33 +0200 Subject: [PATCH 7/7] =?UTF-8?q?=F0=9F=90=9B=20leibniz=5Favx2.cpp:=20Fix=20?= =?UTF-8?q?build=20to=20use=20the=20file?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Earthfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Earthfile b/Earthfile index 4a854e2..bdf428a 100644 --- a/Earthfile +++ b/Earthfile @@ -121,7 +121,7 @@ cpp-avx2: RUN apk add --no-cache gcc build-base COPY ./src/leibniz_avx2.cpp ./ - RUN --no-cache g++ leibniz.cpp -o leibniz_avx2 -O3 -s -static -flto -march=native -mtune=native -fomit-frame-pointer -fno-signed-zeros -fno-trapping-math -fassociative-math + RUN --no-cache g++ leibniz_avx2.cpp -o leibniz_avx2 -O3 -s -static -flto -march=native -mtune=native -fomit-frame-pointer -fno-signed-zeros -fno-trapping-math -fassociative-math DO +BENCH --name="cpp-avx2" --lang="C++ (avx2)" --version="g++ --version" --cmd="./leibniz_avx2" crystal: