31template <simd::simd_concept simd_t>
37template <simd::simd_concept simd_t>
38constexpr void store_avx512(
void * mem_addr, simd_t
const & simd_vec);
43template <simd::simd_concept simd_t>
49template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
55template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
61template <u
int8_t index, simd::simd_concept simd_t>
67template <u
int8_t index, simd::simd_concept simd_t>
73template <u
int8_t index, simd::simd_concept simd_t>
87template <simd::simd_concept simd_t>
90 return reinterpret_cast<simd_t
>(_mm512_loadu_si512(mem_addr));
93template <simd::simd_concept simd_t>
94constexpr void store_avx512(
void * mem_addr, simd_t
const & simd_vec)
96 _mm512_storeu_si512(mem_addr,
reinterpret_cast<__m512i
const &
>(simd_vec));
99# if defined(__AVX512BW__)
100template <simd::simd_concept simd_t>
118 for (
int i = 0; i < 32; ++i)
120 tmp1[i] = _mm512_unpacklo_epi8(
reinterpret_cast<__m512i
const &
>(
matrix[2 * i]),
121 reinterpret_cast<__m512i
const &
>(
matrix[2 * i + 1]));
122 tmp1[i + 32] = _mm512_unpackhi_epi8(
reinterpret_cast<__m512i
const &
>(
matrix[2 * i]),
123 reinterpret_cast<__m512i
const &
>(
matrix[2 * i + 1]));
128 for (
int i = 0; i < 32; ++i)
130 tmp2[i] = _mm512_unpacklo_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
131 tmp2[i + 32] = _mm512_unpackhi_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
134 for (
int i = 0; i < 32; ++i)
136 tmp1[i] = _mm512_unpacklo_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
137 tmp1[i + 32] = _mm512_unpackhi_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
140 for (
int i = 0; i < 32; ++i)
142 tmp2[i] = _mm512_unpacklo_epi64(tmp1[2 * i], tmp1[2 * i + 1]);
143 tmp2[i + 32] = _mm512_unpackhi_epi64(tmp1[2 * i], tmp1[2 * i + 1]);
148 auto _mm512_unpacklo_epi128 = [](__m512i
const & a, __m512i
const & b)
151 return _mm512_permutex2var_epi64(a,
reinterpret_cast<__m512i
const &
>(*lo_mask.data()), b);
154 auto _mm521_unpackhi_epi128 = [](__m512i
const & a, __m512i
const & b)
157 return _mm512_permutex2var_epi64(a,
reinterpret_cast<__m512i
const &
>(*hi_mask.data()), b);
160 for (
int i = 0; i < 32; ++i)
162 tmp1[i] = _mm512_unpacklo_epi128(tmp2[2 * i], tmp2[2 * i + 1]);
163 tmp1[i + 32] = _mm521_unpackhi_epi128(tmp2[2 * i], tmp2[2 * i + 1]);
167 auto _mm512_unpacklo_epi256 = [](__m512i
const & a, __m512i
const & b)
169 return _mm512_shuffle_i64x2(a, b, 0b0100'0100);
172 auto _mm521_unpackhi_epi256 = [](__m512i
const & a, __m512i
const & b)
174 return _mm512_shuffle_i64x2(a, b, 0b1110'1110);
182 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
184 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31,
186 32, 48, 40, 56, 36, 52, 44, 60, 34, 50, 42, 58, 38, 54, 46, 62,
188 33, 49, 41, 57, 37, 53, 45, 61, 35, 51, 43, 59, 39, 55, 47, 63};
191 for (
int i = 0; i < 32; ++i)
193 int const idx = i * 2;
194 matrix[reverse_idx_mask[idx]] =
reinterpret_cast<simd_t
>(_mm512_unpacklo_epi256(tmp1[idx], tmp1[idx + 1]));
195 matrix[reverse_idx_mask[idx + 1]] =
reinterpret_cast<simd_t
>(_mm521_unpackhi_epi256(tmp1[idx], tmp1[idx + 1]));
200template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
203 __m512i
const & tmp =
reinterpret_cast<__m512i
const &
>(src);
204 if constexpr (simd_traits<source_simd_t>::length == 64)
206 if constexpr (simd_traits<target_simd_t>::length == 32)
207 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi8_epi16(_mm512_castsi512_si256(tmp)));
208 if constexpr (simd_traits<target_simd_t>::length == 16)
209 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi8_epi32(_mm512_castsi512_si128(tmp)));
210 if constexpr (simd_traits<target_simd_t>::length == 8)
211 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi8_epi64(_mm512_castsi512_si128(tmp)));
213 else if constexpr (simd_traits<source_simd_t>::length == 32)
215 if constexpr (simd_traits<target_simd_t>::length == 16)
216 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi16_epi32(_mm512_castsi512_si256(tmp)));
217 if constexpr (simd_traits<target_simd_t>::length == 8)
218 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi16_epi64(_mm512_castsi512_si128(tmp)));
222 static_assert(simd_traits<source_simd_t>::length == 16,
"Expected 32 bit scalar type.");
223 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi32_epi64(_mm512_castsi512_si256(tmp)));
227template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
230 __m512i
const & tmp =
reinterpret_cast<__m512i
const &
>(src);
231 if constexpr (simd_traits<source_simd_t>::length == 64)
233 if constexpr (simd_traits<target_simd_t>::length == 32)
234 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu8_epi16(_mm512_castsi512_si256(tmp)));
235 if constexpr (simd_traits<target_simd_t>::length == 16)
236 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu8_epi32(_mm512_castsi512_si128(tmp)));
237 if constexpr (simd_traits<target_simd_t>::length == 8)
238 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu8_epi64(_mm512_castsi512_si128(tmp)));
240 else if constexpr (simd_traits<source_simd_t>::length == 32)
242 if constexpr (simd_traits<target_simd_t>::length == 16)
243 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu16_epi32(_mm512_castsi512_si256(tmp)));
244 if constexpr (simd_traits<target_simd_t>::length == 8)
245 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu16_epi64(_mm512_castsi512_si128(tmp)));
249 static_assert(simd_traits<source_simd_t>::length == 16,
"Expected 32 bit scalar type.");
250 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu32_epi64(_mm512_castsi512_si256(tmp)));
254template <u
int8_t index, simd::simd_concept simd_t>
257 return reinterpret_cast<simd_t
>(
258 _mm512_castsi256_si512(_mm512_extracti64x4_epi64(
reinterpret_cast<__m512i
const &
>(src), index)));
261# if defined(__AVX512DQ__)
262template <u
int8_t index, simd::simd_concept simd_t>
265 return reinterpret_cast<simd_t
>(
266 _mm512_castsi128_si512(_mm512_extracti64x2_epi64(
reinterpret_cast<__m512i
const &
>(src), index)));
269template <u
int8_t index, simd::simd_concept simd_t>
272 __m512i tmp =
reinterpret_cast<__m512i
const &
>(src);
275 if constexpr (index % 2 == 1)
276 tmp = _mm512_shuffle_epi32(tmp, 0b0100'1110);
278 return reinterpret_cast<simd_t
>(_mm512_castsi128_si512(_mm512_extracti64x2_epi64(tmp, index / 2)));
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
Provides intrinsics include for builtin simd.
Defines the requirements of a matrix (e.g. score matrices, trace matrices).
The internal SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
void transpose_matrix_avx512(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.
constexpr simd_t extract_half_avx512(simd_t const &src)
Extracts one half of the given simd vector and stores the result in the lower half of the target vect...
constexpr simd_t load_avx512(void const *mem_addr)
Load simd_t size bits of integral data from memory.
constexpr target_simd_t upcast_signed_avx512(source_simd_t const &src)
Upcasts the given vector into the target vector using signed extension of packed values.
constexpr void store_avx512(void *mem_addr, simd_t const &simd_vec)
Store simd_t size bits of integral data into memory.
constexpr simd_t extract_quarter_avx512(simd_t const &src)
Extracts one quarter of the given simd vector and stores it in the lower quarter of the target vector...
constexpr target_simd_t upcast_unsigned_avx512(source_simd_t const &src)
Upcasts the given vector into the target vector using unsigned extension of packed values.
constexpr simd_t extract_eighth_avx512(simd_t const &src)
Extracts one eighth of the given simd vector and stores it in the lower eighth of the target vector.
Provides seqan3::simd::simd_traits.
seqan3::simd::simd_traits is the trait class that provides uniform interface to the properties of sim...
Definition: simd_traits.hpp:41
Provides seqan3::simd::simd_concept.