diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 925c133..21348a2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,7 +6,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] - nim-version: ['1.2.x', '1.4.x', '1.6.x', 'stable'] + nim-version: ['1.2.x', '1.4.x', '1.6.x', '2.0.x', 'stable'] include: - nim-version: '1.4.x' gc_orc: true @@ -17,7 +17,7 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: jiro4989/setup-nim-action@v1 + - uses: jiro4989/setup-nim-action@v2 with: nim-version: ${{ matrix.nim-version }} repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/nimsimd.nimble b/nimsimd.nimble index 04093b0..9c01e01 100644 --- a/nimsimd.nimble +++ b/nimsimd.nimble @@ -1,4 +1,4 @@ -version = "1.3.1" +version = "1.3.2" author = "Ryan Oldenburg" description = "Pleasant Nim bindings for SIMD instruction sets." license = "MIT" diff --git a/src/nimsimd/neon.nim b/src/nimsimd/neon.nim index 08a3fc7..091d39b 100644 --- a/src/nimsimd/neon.nim +++ b/src/nimsimd/neon.nim @@ -187,6 +187,10 @@ func vld1q_lane_u16*(p: pointer, v: uint16x8, lane: int32): uint16x8 func vld1q_lane_u32*(p: pointer, v: uint32x4, lane: int32): uint32x4 func vld1q_lane_u64*(p: pointer, v: uint64x2, lane: int32): uint64x2 +func vld1_lane_u32*(p: pointer, v: uint32x2, lane: int32): uint32x2 +func vld1_lane_u16*(p: pointer, v: uint16x4, lane: int32): uint16x4 +func vld1_lane_u8*(p: pointer, v: uint8x8, lane: int32): uint8x8 + func vld1q_dup_f64*(p: pointer): float64x2 func vst1q_s32*(p: pointer, v: int32x4) @@ -201,9 +205,12 @@ func vst1_u8*(p: pointer, v: uint8x8) func vst1_u16*(p: pointer, v: uint16x4) func vst1_u32*(p: pointer, v: uint32x2) func vst1_u64*(p: pointer, v: uint64x1) - func vst1_f32*(p: pointer, v: float32x2) +func vst1_lane_u32*(p: pointer, v: uint32x2, lane: int32) +func vst1_lane_u16*(p: pointer, v: uint16x4, lane: int32) +func vst1_lane_u8*(p: pointer, v: uint8x8, lane: int32) + func vst2_u8*(p: pointer, v: uint8x8x2) func vst2_u16*(p: pointer, v: uint16x4x2) func vst2_u32*(p: pointer, v: uint32x2x2) @@ -396,6 +403,9 @@ func vextq_f64*(a, b: float64x2, n: int): float64x2 func vminvq_u32*(a: uint32x4): uint32 func vminvq_f32*(a: float32x4): float32 +func vminvq_u8*(a: uint8x16): uint8 + +func vminv_u8*(a: uint8x8): uint8 func vmaxq_u8*(a, b: uint8x16): uint8x16 @@ -451,6 +461,10 @@ func vreinterpretq_s32_u8*(a: uint8x16): int32x4 func vreinterpretq_s32_u32*(a: uint32x4): int32x4 func vreinterpretq_u16_u8*(a: uint8x16): uint16x8 +func vreinterpret_u32_u8*(a: uint8x8): uint32x2 +func vreinterpret_u8_u16*(a: uint16x4): uint8x8 +func vreinterpret_u16_u8*(a: uint8x8): uint16x4 +func vreinterpret_u8_u32*(a: uint32x2): uint8x8 func vreinterpret_u64_u8*(a: uint8x8): uint64x1 func vreinterpret_s32_u32*(a: uint32x2): int32x2 func vreinterpret_f32_u64*(a: uint64x1): float32x2 @@ -464,4 +478,7 @@ func uint32x4_immediate*(v3, v2, v1, v0: static uint32): uint32x4 {.inline.} = {.emit: [result, " = (uint32x4_t){", v0, ",", v1, ",", v2, ",", v3, "};"].} func uint8x16_immediate*(v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0: static uint8): uint8x16 {.inline.} = - {.emit: [result, " = (uint8x16_t){", v0, ",", v1, ",", v2, ",", v3, ",", v4, ",", v5, ",", v6, ",", v7, ",", v8, ",", v9, ",", v10, ",", v11, ",", v12, ",", v13, ",", v14, ",", v15, "};"].} + {.emit: [result, " = (uint8x8_t){", v0, ",", v1, ",", v2, ",", v3, ",", v4, ",", v5, ",", v6, ",", v7, ",", v8, ",", v9, ",", v10, ",", v11, ",", v12, ",", v13, ",", v14, ",", v15, "};"].} + +func uint8x8_immediate*(v7, v6, v5, v4, v3, v2, v1, v0: static uint8): uint8x8 {.inline.} = + {.emit: [result, " = (uint8x8_t){", v0, ",", v1, ",", v2, ",", v3, ",", v4, ",", v5, ",", v6, ",", v7, "};"].} diff --git a/src/nimsimd/sse2.nim b/src/nimsimd/sse2.nim index 1f00539..2612bd5 100644 --- a/src/nimsimd/sse2.nim +++ b/src/nimsimd/sse2.nim @@ -509,7 +509,7 @@ func mm_set_epi32*(a, b, c, d: int32 | uint32): M128i {.importc: "_mm_set_epi32" func mm_set_epi64x*(a, b: int64 | uint64): M128i {.importc: "_mm_set_epi64x".} -func mm_set_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: int8 | uint8): M128i {.importc: "_mm_set_epi8".} +func mm_set_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: int8 | uint8 | char): M128i {.importc: "_mm_set_epi8".} func mm_set_pd*(a, b: float64): M128d {.importc: "_mm_set_pd".} @@ -523,7 +523,7 @@ func mm_set1_epi32*(a: int32 | uint32): M128i {.importc: "_mm_set1_epi32".} func mm_set1_epi64x*(a: int64 | uint64): M128i {.importc: "_mm_set1_epi64x".} -func mm_set1_epi8*(a: int8 | uint8): M128i {.importc: "_mm_set1_epi8".} +func mm_set1_epi8*(a: int8 | uint8 | char): M128i {.importc: "_mm_set1_epi8".} func mm_set1_pd*(a: float64): M128d {.importc: "_mm_set1_pd".} @@ -531,7 +531,7 @@ func mm_setr_epi16*(a, b, c, d, e, f, g, h: int16): M128i {.importc: "_mm_setr_e func mm_setr_epi32*(a, b, c, d: int32 | uint32): M128i {.importc: "_mm_setr_epi32".} -func mm_setr_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: uint8): M128i {.importc: "_mm_setr_epi8".} +func mm_setr_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: int8 | uint8 | char): M128i {.importc: "_mm_setr_epi8".} func mm_setr_pd*(a, b: float64): M128d {.importc: "_mm_setr_pd".}