Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1.3.2 #32

Merged
merged 5 commits into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
nim-version: ['1.2.x', '1.4.x', '1.6.x', 'stable']
nim-version: ['1.2.x', '1.4.x', '1.6.x', '2.0.x', 'stable']
include:
- nim-version: '1.4.x'
gc_orc: true
Expand All @@ -17,7 +17,7 @@ jobs:

steps:
- uses: actions/checkout@v4
- uses: jiro4989/setup-nim-action@v1
- uses: jiro4989/setup-nim-action@v2
with:
nim-version: ${{ matrix.nim-version }}
repo-token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion nimsimd.nimble
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version = "1.3.1"
version = "1.3.2"
author = "Ryan Oldenburg"
description = "Pleasant Nim bindings for SIMD instruction sets."
license = "MIT"
Expand Down
21 changes: 19 additions & 2 deletions src/nimsimd/neon.nim
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ func vld1q_lane_u16*(p: pointer, v: uint16x8, lane: int32): uint16x8
func vld1q_lane_u32*(p: pointer, v: uint32x4, lane: int32): uint32x4
func vld1q_lane_u64*(p: pointer, v: uint64x2, lane: int32): uint64x2

func vld1_lane_u32*(p: pointer, v: uint32x2, lane: int32): uint32x2
func vld1_lane_u16*(p: pointer, v: uint16x4, lane: int32): uint16x4
func vld1_lane_u8*(p: pointer, v: uint8x8, lane: int32): uint8x8

func vld1q_dup_f64*(p: pointer): float64x2

func vst1q_s32*(p: pointer, v: int32x4)
Expand All @@ -201,9 +205,12 @@ func vst1_u8*(p: pointer, v: uint8x8)
func vst1_u16*(p: pointer, v: uint16x4)
func vst1_u32*(p: pointer, v: uint32x2)
func vst1_u64*(p: pointer, v: uint64x1)

func vst1_f32*(p: pointer, v: float32x2)

func vst1_lane_u32*(p: pointer, v: uint32x2, lane: int32)
func vst1_lane_u16*(p: pointer, v: uint16x4, lane: int32)
func vst1_lane_u8*(p: pointer, v: uint8x8, lane: int32)

func vst2_u8*(p: pointer, v: uint8x8x2)
func vst2_u16*(p: pointer, v: uint16x4x2)
func vst2_u32*(p: pointer, v: uint32x2x2)
Expand Down Expand Up @@ -396,6 +403,9 @@ func vextq_f64*(a, b: float64x2, n: int): float64x2

func vminvq_u32*(a: uint32x4): uint32
func vminvq_f32*(a: float32x4): float32
func vminvq_u8*(a: uint8x16): uint8

func vminv_u8*(a: uint8x8): uint8

func vmaxq_u8*(a, b: uint8x16): uint8x16

Expand Down Expand Up @@ -451,6 +461,10 @@ func vreinterpretq_s32_u8*(a: uint8x16): int32x4
func vreinterpretq_s32_u32*(a: uint32x4): int32x4
func vreinterpretq_u16_u8*(a: uint8x16): uint16x8

func vreinterpret_u32_u8*(a: uint8x8): uint32x2
func vreinterpret_u8_u16*(a: uint16x4): uint8x8
func vreinterpret_u16_u8*(a: uint8x8): uint16x4
func vreinterpret_u8_u32*(a: uint32x2): uint8x8
func vreinterpret_u64_u8*(a: uint8x8): uint64x1
func vreinterpret_s32_u32*(a: uint32x2): int32x2
func vreinterpret_f32_u64*(a: uint64x1): float32x2
Expand All @@ -464,4 +478,7 @@ func uint32x4_immediate*(v3, v2, v1, v0: static uint32): uint32x4 {.inline.} =
{.emit: [result, " = (uint32x4_t){", v0, ",", v1, ",", v2, ",", v3, "};"].}

func uint8x16_immediate*(v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0: static uint8): uint8x16 {.inline.} =
{.emit: [result, " = (uint8x16_t){", v0, ",", v1, ",", v2, ",", v3, ",", v4, ",", v5, ",", v6, ",", v7, ",", v8, ",", v9, ",", v10, ",", v11, ",", v12, ",", v13, ",", v14, ",", v15, "};"].}
{.emit: [result, " = (uint8x8_t){", v0, ",", v1, ",", v2, ",", v3, ",", v4, ",", v5, ",", v6, ",", v7, ",", v8, ",", v9, ",", v10, ",", v11, ",", v12, ",", v13, ",", v14, ",", v15, "};"].}

func uint8x8_immediate*(v7, v6, v5, v4, v3, v2, v1, v0: static uint8): uint8x8 {.inline.} =
{.emit: [result, " = (uint8x8_t){", v0, ",", v1, ",", v2, ",", v3, ",", v4, ",", v5, ",", v6, ",", v7, "};"].}
6 changes: 3 additions & 3 deletions src/nimsimd/sse2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ func mm_set_epi32*(a, b, c, d: int32 | uint32): M128i {.importc: "_mm_set_epi32"

func mm_set_epi64x*(a, b: int64 | uint64): M128i {.importc: "_mm_set_epi64x".}

func mm_set_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: int8 | uint8): M128i {.importc: "_mm_set_epi8".}
func mm_set_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: int8 | uint8 | char): M128i {.importc: "_mm_set_epi8".}

func mm_set_pd*(a, b: float64): M128d {.importc: "_mm_set_pd".}

Expand All @@ -523,15 +523,15 @@ func mm_set1_epi32*(a: int32 | uint32): M128i {.importc: "_mm_set1_epi32".}

func mm_set1_epi64x*(a: int64 | uint64): M128i {.importc: "_mm_set1_epi64x".}

func mm_set1_epi8*(a: int8 | uint8): M128i {.importc: "_mm_set1_epi8".}
func mm_set1_epi8*(a: int8 | uint8 | char): M128i {.importc: "_mm_set1_epi8".}

func mm_set1_pd*(a: float64): M128d {.importc: "_mm_set1_pd".}

func mm_setr_epi16*(a, b, c, d, e, f, g, h: int16): M128i {.importc: "_mm_setr_epi16".}

func mm_setr_epi32*(a, b, c, d: int32 | uint32): M128i {.importc: "_mm_setr_epi32".}

func mm_setr_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: uint8): M128i {.importc: "_mm_setr_epi8".}
func mm_setr_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: int8 | uint8 | char): M128i {.importc: "_mm_setr_epi8".}

func mm_setr_pd*(a, b: float64): M128d {.importc: "_mm_setr_pd".}

Expand Down
Loading