libstdc++
simd.h
1 // Definition of the public simd interfaces -*- C++ -*-
2 
3 // Copyright (C) 2020-2023 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H
27 
28 #if __cplusplus >= 201703L
29 
30 #include "simd_detail.h"
31 #include "numeric_traits.h"
32 #include <bit>
33 #include <bitset>
34 #ifdef _GLIBCXX_DEBUG_UB
35 #include <cstdio> // for stderr
36 #endif
37 #include <cstring>
38 #include <cmath>
39 #include <functional>
40 #include <iosfwd>
41 #include <utility>
42 
43 #if _GLIBCXX_SIMD_X86INTRIN
44 #include <x86intrin.h>
45 #elif _GLIBCXX_SIMD_HAVE_NEON
46 #include <arm_neon.h>
47 #endif
48 
49 /** @ingroup ts_simd
50  * @{
51  */
52 /* There are several closely related types, with the following naming
53  * convention:
54  * _Tp: vectorizable (arithmetic) type (or any type)
55  * _TV: __vector_type_t<_Tp, _Np>
56  * _TW: _SimdWrapper<_Tp, _Np>
57  * _TI: __intrinsic_type_t<_Tp, _Np>
58  * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
59  * If one additional type is needed use _U instead of _T.
60  * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
61  *
62  * More naming conventions:
63  * _Ap or _Abi: An ABI tag from the simd_abi namespace
64  * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
65  * _IV, _IW as for _TV, _TW
66  * _Np: number of elements (not bytes)
67  * _Bytes: number of bytes
68  *
69  * Variable names:
70  * __k: mask object (vector- or bitmask)
71  */
72 _GLIBCXX_SIMD_BEGIN_NAMESPACE
73 
74 #if !_GLIBCXX_SIMD_X86INTRIN
75 using __m128 [[__gnu__::__vector_size__(16)]] = float;
76 using __m128d [[__gnu__::__vector_size__(16)]] = double;
77 using __m128i [[__gnu__::__vector_size__(16)]] = long long;
78 using __m256 [[__gnu__::__vector_size__(32)]] = float;
79 using __m256d [[__gnu__::__vector_size__(32)]] = double;
80 using __m256i [[__gnu__::__vector_size__(32)]] = long long;
81 using __m512 [[__gnu__::__vector_size__(64)]] = float;
82 using __m512d [[__gnu__::__vector_size__(64)]] = double;
83 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
84 #endif
85 
86 namespace simd_abi {
87 // simd_abi forward declarations {{{
88 // implementation details:
89 struct _Scalar;
90 
91 template <int _Np>
92  struct _Fixed;
93 
94 // There are two major ABIs that appear on different architectures.
95 // Both have non-boolean values packed into an N Byte register
96 // -> #elements = N / sizeof(T)
97 // Masks differ:
98 // 1. Use value vector registers for masks (all 0 or all 1)
99 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding
100 // value vector
101 //
102 // Both can be partially used, masking off the rest when doing horizontal
103 // operations or operations that can trap (e.g. FP_INVALID or integer division
104 // by 0). This is encoded as the number of used bytes.
105 template <int _UsedBytes>
106  struct _VecBuiltin;
107 
108 template <int _UsedBytes>
109  struct _VecBltnBtmsk;
110 
111 template <typename _Tp, int _Np>
112  using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
113 
114 template <int _UsedBytes = 16>
115  using _Sse = _VecBuiltin<_UsedBytes>;
116 
117 template <int _UsedBytes = 32>
118  using _Avx = _VecBuiltin<_UsedBytes>;
119 
120 template <int _UsedBytes = 64>
121  using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
122 
123 template <int _UsedBytes = 16>
124  using _Neon = _VecBuiltin<_UsedBytes>;
125 
126 // implementation-defined:
127 using __sse = _Sse<>;
128 using __avx = _Avx<>;
129 using __avx512 = _Avx512<>;
130 using __neon = _Neon<>;
131 using __neon128 = _Neon<16>;
132 using __neon64 = _Neon<8>;
133 
134 // standard:
135 template <typename _Tp, size_t _Np, typename...>
136  struct deduce;
137 
138 template <int _Np>
139  using fixed_size = _Fixed<_Np>;
140 
141 using scalar = _Scalar;
142 
143 // }}}
144 } // namespace simd_abi
145 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{
146 template <typename _Tp>
147  struct is_simd;
148 
149 template <typename _Tp>
150  struct is_simd_mask;
151 
152 template <typename _Tp, typename _Abi>
153  class simd;
154 
155 template <typename _Tp, typename _Abi>
156  class simd_mask;
157 
158 template <typename _Tp, typename _Abi>
159  struct simd_size;
160 
161 // }}}
162 // load/store flags {{{
163 struct element_aligned_tag
164 {
165  template <typename _Tp, typename _Up = typename _Tp::value_type>
166  static constexpr size_t _S_alignment = alignof(_Up);
167 
168  template <typename _Tp, typename _Up>
169  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
170  _S_apply(_Up* __ptr)
171  { return __ptr; }
172 };
173 
174 struct vector_aligned_tag
175 {
176  template <typename _Tp, typename _Up = typename _Tp::value_type>
177  static constexpr size_t _S_alignment
178  = std::__bit_ceil(sizeof(_Up) * _Tp::size());
179 
180  template <typename _Tp, typename _Up>
181  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
182  _S_apply(_Up* __ptr)
183  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
184 };
185 
186 template <size_t _Np> struct overaligned_tag
187 {
188  template <typename _Tp, typename _Up = typename _Tp::value_type>
189  static constexpr size_t _S_alignment = _Np;
190 
191  template <typename _Tp, typename _Up>
192  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
193  _S_apply(_Up* __ptr)
194  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
195 };
196 
197 inline constexpr element_aligned_tag element_aligned = {};
198 
199 inline constexpr vector_aligned_tag vector_aligned = {};
200 
201 template <size_t _Np>
202  inline constexpr overaligned_tag<_Np> overaligned = {};
203 
204 // }}}
205 template <size_t _Xp>
206  using _SizeConstant = integral_constant<size_t, _Xp>;
207 // constexpr feature detection{{{
208 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
209 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
210 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
211 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
212 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
213 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
214 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
215 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
216 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
217 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
218 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
219 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
220 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
221 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
222 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
223 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
224 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
225 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
226 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
227 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
228 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
229 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
230 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
231 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
232 constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
233 constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
234 constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
235 constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
236 constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
237 constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
238 constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
239 constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
240 
241 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
242 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
243 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
244 constexpr inline bool __support_neon_float =
245 #if defined __GCC_IEC_559
246  __GCC_IEC_559 == 0;
247 #elif defined __FAST_MATH__
248  true;
249 #else
250  false;
251 #endif
252 
253 #ifdef _ARCH_PWR10
254 constexpr inline bool __have_power10vec = true;
255 #else
256 constexpr inline bool __have_power10vec = false;
257 #endif
258 #ifdef __POWER9_VECTOR__
259 constexpr inline bool __have_power9vec = true;
260 #else
261 constexpr inline bool __have_power9vec = false;
262 #endif
263 #if defined __POWER8_VECTOR__
264 constexpr inline bool __have_power8vec = true;
265 #else
266 constexpr inline bool __have_power8vec = __have_power9vec;
267 #endif
268 #if defined __VSX__
269 constexpr inline bool __have_power_vsx = true;
270 #else
271 constexpr inline bool __have_power_vsx = __have_power8vec;
272 #endif
273 #if defined __ALTIVEC__
274 constexpr inline bool __have_power_vmx = true;
275 #else
276 constexpr inline bool __have_power_vmx = __have_power_vsx;
277 #endif
278 
279 // }}}
280 
281 namespace __detail
282 {
283 #ifdef math_errhandling
284  // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
285  // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
286  // must be guessed.
287  template <int = math_errhandling>
288  constexpr bool
289  __handle_fpexcept_impl(int)
290  { return math_errhandling & MATH_ERREXCEPT; }
291 #endif
292 
293  // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
294  // ignored, otherwise implement correct exception behavior.
295  constexpr bool
296  __handle_fpexcept_impl(float)
297  {
298 #if defined __FAST_MATH__
299  return false;
300 #else
301  return true;
302 #endif
303  }
304 
305  /// True if math functions must raise floating-point exceptions as specified by C17.
306  static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
307 
308  constexpr std::uint_least64_t
309  __floating_point_flags()
310  {
311  std::uint_least64_t __flags = 0;
312  if constexpr (_S_handle_fpexcept)
313  __flags |= 1;
314 #ifdef __FAST_MATH__
315  __flags |= 1 << 1;
316 #elif __FINITE_MATH_ONLY__
317  __flags |= 2 << 1;
318 #elif __GCC_IEC_559 < 2
319  __flags |= 3 << 1;
320 #endif
321  __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
322  return __flags;
323  }
324 
325  constexpr std::uint_least64_t
326  __machine_flags()
327  {
328  if constexpr (__have_mmx || __have_sse)
329  return __have_mmx
330  | (__have_sse << 1)
331  | (__have_sse2 << 2)
332  | (__have_sse3 << 3)
333  | (__have_ssse3 << 4)
334  | (__have_sse4_1 << 5)
335  | (__have_sse4_2 << 6)
336  | (__have_xop << 7)
337  | (__have_avx << 8)
338  | (__have_avx2 << 9)
339  | (__have_bmi << 10)
340  | (__have_bmi2 << 11)
341  | (__have_lzcnt << 12)
342  | (__have_sse4a << 13)
343  | (__have_fma << 14)
344  | (__have_fma4 << 15)
345  | (__have_f16c << 16)
346  | (__have_popcnt << 17)
347  | (__have_avx512f << 18)
348  | (__have_avx512dq << 19)
349  | (__have_avx512vl << 20)
350  | (__have_avx512bw << 21)
351  | (__have_avx512bitalg << 22)
352  | (__have_avx512vbmi2 << 23)
353  | (__have_avx512vbmi << 24)
354  | (__have_avx512ifma << 25)
355  | (__have_avx512cd << 26)
356  | (__have_avx512vnni << 27)
357  | (__have_avx512vpopcntdq << 28)
358  | (__have_avx512vp2intersect << 29);
359  else if constexpr (__have_neon)
360  return __have_neon
361  | (__have_neon_a32 << 1)
362  | (__have_neon_a64 << 2)
363  | (__have_neon_a64 << 2)
364  | (__support_neon_float << 3);
365  else if constexpr (__have_power_vmx)
366  return __have_power_vmx
367  | (__have_power_vsx << 1)
368  | (__have_power8vec << 2)
369  | (__have_power9vec << 3)
370  | (__have_power10vec << 4);
371  else
372  return 0;
373  }
374 
375  namespace
376  {
377  struct _OdrEnforcer {};
378  }
379 
380  template <std::uint_least64_t...>
381  struct _MachineFlagsTemplate {};
382 
383  /**@internal
384  * Use this type as default template argument to all function templates that
385  * are not declared always_inline. It ensures, that a function
386  * specialization, which the compiler decides not to inline, has a unique symbol
387  * (_OdrEnforcer) or a symbol matching the machine/architecture flags
388  * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
389  * users link TUs compiled with different flags. This is especially important
390  * for using simd in libraries.
391  */
392  using __odr_helper
393  = conditional_t<__machine_flags() == 0, _OdrEnforcer,
394  _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
395 
396  struct _Minimum
397  {
398  template <typename _Tp>
399  _GLIBCXX_SIMD_INTRINSIC constexpr
400  _Tp
401  operator()(_Tp __a, _Tp __b) const
402  {
403  using std::min;
404  return min(__a, __b);
405  }
406  };
407 
408  struct _Maximum
409  {
410  template <typename _Tp>
411  _GLIBCXX_SIMD_INTRINSIC constexpr
412  _Tp
413  operator()(_Tp __a, _Tp __b) const
414  {
415  using std::max;
416  return max(__a, __b);
417  }
418  };
419 } // namespace __detail
420 
421 // unrolled/pack execution helpers
422 // __execute_n_times{{{
423 template <typename _Fp, size_t... _I>
424  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
425  void
426  __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
427  { ((void)__f(_SizeConstant<_I>()), ...); }
428 
429 template <typename _Fp>
430  _GLIBCXX_SIMD_INTRINSIC constexpr void
431  __execute_on_index_sequence(_Fp&&, index_sequence<>)
432  { }
433 
434 template <size_t _Np, typename _Fp>
435  _GLIBCXX_SIMD_INTRINSIC constexpr void
436  __execute_n_times(_Fp&& __f)
437  {
438  __execute_on_index_sequence(static_cast<_Fp&&>(__f),
439  make_index_sequence<_Np>{});
440  }
441 
442 // }}}
443 // __generate_from_n_evaluations{{{
444 template <typename _R, typename _Fp, size_t... _I>
445  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
446  _R
447  __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
448  { return _R{__f(_SizeConstant<_I>())...}; }
449 
450 template <size_t _Np, typename _R, typename _Fp>
451  _GLIBCXX_SIMD_INTRINSIC constexpr _R
452  __generate_from_n_evaluations(_Fp&& __f)
453  {
454  return __execute_on_index_sequence_with_return<_R>(
455  static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
456  }
457 
458 // }}}
459 // __call_with_n_evaluations{{{
460 template <size_t... _I, typename _F0, typename _FArgs>
461  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
462  auto
463  __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
464  { return __f0(__fargs(_SizeConstant<_I>())...); }
465 
466 template <size_t _Np, typename _F0, typename _FArgs>
467  _GLIBCXX_SIMD_INTRINSIC constexpr auto
468  __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
469  {
470  return __call_with_n_evaluations(make_index_sequence<_Np>{},
471  static_cast<_F0&&>(__f0),
472  static_cast<_FArgs&&>(__fargs));
473  }
474 
475 // }}}
476 // __call_with_subscripts{{{
477 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
478  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
479  auto
480  __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
481  { return __fun(__x[_First + _It]...); }
482 
483 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
484  _GLIBCXX_SIMD_INTRINSIC constexpr auto
485  __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
486  {
487  return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
488  make_index_sequence<_Np>(),
489  static_cast<_Fp&&>(__fun));
490  }
491 
492 // }}}
493 
494 // vvv ---- type traits ---- vvv
495 // integer type aliases{{{
496 using _UChar = unsigned char;
497 using _SChar = signed char;
498 using _UShort = unsigned short;
499 using _UInt = unsigned int;
500 using _ULong = unsigned long;
501 using _ULLong = unsigned long long;
502 using _LLong = long long;
503 
504 //}}}
505 // __first_of_pack{{{
506 template <typename _T0, typename...>
507  struct __first_of_pack
508  { using type = _T0; };
509 
510 template <typename... _Ts>
511  using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
512 
513 //}}}
514 // __value_type_or_identity_t {{{
515 template <typename _Tp>
516  typename _Tp::value_type
517  __value_type_or_identity_impl(int);
518 
519 template <typename _Tp>
520  _Tp
521  __value_type_or_identity_impl(float);
522 
523 template <typename _Tp>
524  using __value_type_or_identity_t
525  = decltype(__value_type_or_identity_impl<_Tp>(int()));
526 
527 // }}}
528 // __is_vectorizable {{{
529 template <typename _Tp>
530  struct __is_vectorizable : public is_arithmetic<_Tp> {};
531 
532 template <>
533  struct __is_vectorizable<bool> : public false_type {};
534 
535 template <typename _Tp>
536  inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
537 
538 // Deduces to a vectorizable type
539 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
540  using _Vectorizable = _Tp;
541 
542 // }}}
543 // _LoadStorePtr / __is_possible_loadstore_conversion {{{
544 template <typename _Ptr, typename _ValueType>
545  struct __is_possible_loadstore_conversion
546  : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
547 
548 template <>
549  struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
550 
551 // Deduces to a type allowed for load/store with the given value type.
552 template <typename _Ptr, typename _ValueType,
553  typename = enable_if_t<
554  __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
555  using _LoadStorePtr = _Ptr;
556 
557 // }}}
558 // __is_bitmask{{{
559 template <typename _Tp, typename = void_t<>>
560  struct __is_bitmask : false_type {};
561 
562 template <typename _Tp>
563  inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
564 
565 // the __mmaskXX case:
566 template <typename _Tp>
567  struct __is_bitmask<_Tp,
568  void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
569  : true_type {};
570 
571 // }}}
572 // __int_for_sizeof{{{
573 #pragma GCC diagnostic push
574 #pragma GCC diagnostic ignored "-Wpedantic"
575 template <size_t _Bytes>
576  constexpr auto
577  __int_for_sizeof()
578  {
579  if constexpr (_Bytes == sizeof(int))
580  return int();
581  #ifdef __clang__
582  else if constexpr (_Bytes == sizeof(char))
583  return char();
584  #else
585  else if constexpr (_Bytes == sizeof(_SChar))
586  return _SChar();
587  #endif
588  else if constexpr (_Bytes == sizeof(short))
589  return short();
590  #ifndef __clang__
591  else if constexpr (_Bytes == sizeof(long))
592  return long();
593  #endif
594  else if constexpr (_Bytes == sizeof(_LLong))
595  return _LLong();
596  #ifdef __SIZEOF_INT128__
597  else if constexpr (_Bytes == sizeof(__int128))
598  return __int128();
599  #endif // __SIZEOF_INT128__
600  else if constexpr (_Bytes % sizeof(int) == 0)
601  {
602  constexpr size_t _Np = _Bytes / sizeof(int);
603  struct _Ip
604  {
605  int _M_data[_Np];
606 
607  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
608  operator&(_Ip __rhs) const
609  {
610  return __generate_from_n_evaluations<_Np, _Ip>(
611  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
612  return __rhs._M_data[__i] & _M_data[__i];
613  });
614  }
615 
616  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
617  operator|(_Ip __rhs) const
618  {
619  return __generate_from_n_evaluations<_Np, _Ip>(
620  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
621  return __rhs._M_data[__i] | _M_data[__i];
622  });
623  }
624 
625  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
626  operator^(_Ip __rhs) const
627  {
628  return __generate_from_n_evaluations<_Np, _Ip>(
629  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
630  return __rhs._M_data[__i] ^ _M_data[__i];
631  });
632  }
633 
634  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
635  operator~() const
636  {
637  return __generate_from_n_evaluations<_Np, _Ip>(
638  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
639  }
640  };
641  return _Ip{};
642  }
643  else
644  static_assert(_Bytes != _Bytes, "this should be unreachable");
645  }
646 #pragma GCC diagnostic pop
647 
648 template <typename _Tp>
649  using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
650 
651 template <size_t _Np>
652  using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
653 
654 // }}}
655 // __is_fixed_size_abi{{{
656 template <typename _Tp>
657  struct __is_fixed_size_abi : false_type {};
658 
659 template <int _Np>
660  struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
661 
662 template <typename _Tp>
663  inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
664 
665 // }}}
666 // __is_scalar_abi {{{
667 template <typename _Abi>
668  constexpr bool
669  __is_scalar_abi()
670  { return is_same_v<simd_abi::scalar, _Abi>; }
671 
672 // }}}
673 // __abi_bytes_v {{{
674 template <template <int> class _Abi, int _Bytes>
675  constexpr int
676  __abi_bytes_impl(_Abi<_Bytes>*)
677  { return _Bytes; }
678 
679 template <typename _Tp>
680  constexpr int
681  __abi_bytes_impl(_Tp*)
682  { return -1; }
683 
684 template <typename _Abi>
685  inline constexpr int __abi_bytes_v
686  = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
687 
688 // }}}
689 // __is_builtin_bitmask_abi {{{
690 template <typename _Abi>
691  constexpr bool
692  __is_builtin_bitmask_abi()
693  { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
694 
695 // }}}
696 // __is_sse_abi {{{
697 template <typename _Abi>
698  constexpr bool
699  __is_sse_abi()
700  {
701  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
702  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
703  }
704 
705 // }}}
706 // __is_avx_abi {{{
707 template <typename _Abi>
708  constexpr bool
709  __is_avx_abi()
710  {
711  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
712  return _Bytes > 16 && _Bytes <= 32
713  && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
714  }
715 
716 // }}}
717 // __is_avx512_abi {{{
718 template <typename _Abi>
719  constexpr bool
720  __is_avx512_abi()
721  {
722  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
723  return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
724  }
725 
726 // }}}
727 // __is_neon_abi {{{
728 template <typename _Abi>
729  constexpr bool
730  __is_neon_abi()
731  {
732  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
733  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
734  }
735 
736 // }}}
737 // __make_dependent_t {{{
738 template <typename, typename _Up>
739  struct __make_dependent
740  { using type = _Up; };
741 
742 template <typename _Tp, typename _Up>
743  using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
744 
745 // }}}
746 // ^^^ ---- type traits ---- ^^^
747 
748 // __invoke_ub{{{
749 template <typename... _Args>
750  [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
751  __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
752  {
753 #ifdef _GLIBCXX_DEBUG_UB
754  __builtin_fprintf(stderr, __msg, __args...);
755  __builtin_trap();
756 #else
757  __builtin_unreachable();
758 #endif
759  }
760 
761 // }}}
762 // __assert_unreachable{{{
763 template <typename _Tp>
764  struct __assert_unreachable
765  { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
766 
767 // }}}
768 // __size_or_zero_v {{{
769 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
770  constexpr size_t
771  __size_or_zero_dispatch(int)
772  { return _Np; }
773 
774 template <typename _Tp, typename _Ap>
775  constexpr size_t
776  __size_or_zero_dispatch(float)
777  { return 0; }
778 
779 template <typename _Tp, typename _Ap>
780  inline constexpr size_t __size_or_zero_v
781  = __size_or_zero_dispatch<_Tp, _Ap>(0);
782 
783 // }}}
784 // __div_roundup {{{
785 inline constexpr size_t
786 __div_roundup(size_t __a, size_t __b)
787 { return (__a + __b - 1) / __b; }
788 
789 // }}}
790 // _ExactBool{{{
791 class _ExactBool
792 {
793  const bool _M_data;
794 
795 public:
796  _GLIBCXX_SIMD_INTRINSIC constexpr
797  _ExactBool(bool __b) : _M_data(__b) {}
798 
799  _ExactBool(int) = delete;
800 
801  _GLIBCXX_SIMD_INTRINSIC constexpr
802  operator bool() const
803  { return _M_data; }
804 };
805 
806 // }}}
807 // __may_alias{{{
808 /**@internal
809  * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
810  * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
811  * that support it).
812  */
813 template <typename _Tp>
814  using __may_alias [[__gnu__::__may_alias__]] = _Tp;
815 
816 // }}}
817 // _UnsupportedBase {{{
818 // simd and simd_mask base for unsupported <_Tp, _Abi>
819 struct _UnsupportedBase
820 {
821  _UnsupportedBase() = delete;
822  _UnsupportedBase(const _UnsupportedBase&) = delete;
823  _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
824  ~_UnsupportedBase() = delete;
825 };
826 
827 // }}}
828 // _InvalidTraits {{{
829 /**
830  * @internal
831  * Defines the implementation of __a given <_Tp, _Abi>.
832  *
833  * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
834  * possible. Static assertions in the type definition do not suffice. It is
835  * important that SFINAE works.
836  */
837 struct _InvalidTraits
838 {
839  using _IsValid = false_type;
840  using _SimdBase = _UnsupportedBase;
841  using _MaskBase = _UnsupportedBase;
842 
843  static constexpr size_t _S_full_size = 0;
844  static constexpr bool _S_is_partial = false;
845 
846  static constexpr size_t _S_simd_align = 1;
847  struct _SimdImpl;
848  struct _SimdMember {};
849  struct _SimdCastType;
850 
851  static constexpr size_t _S_mask_align = 1;
852  struct _MaskImpl;
853  struct _MaskMember {};
854  struct _MaskCastType;
855 };
856 
857 // }}}
858 // _SimdTraits {{{
859 template <typename _Tp, typename _Abi, typename = void_t<>>
860  struct _SimdTraits : _InvalidTraits {};
861 
862 // }}}
863 // __private_init, __bitset_init{{{
864 /**
865  * @internal
866  * Tag used for private init constructor of simd and simd_mask
867  */
868 inline constexpr struct _PrivateInit {} __private_init = {};
869 
870 inline constexpr struct _BitsetInit {} __bitset_init = {};
871 
872 // }}}
873 // __is_narrowing_conversion<_From, _To>{{{
874 template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
875  bool = is_arithmetic_v<_To>>
876  struct __is_narrowing_conversion;
877 
878 // ignore "signed/unsigned mismatch" in the following trait.
879 // The implicit conversions will do the right thing here.
880 template <typename _From, typename _To>
881  struct __is_narrowing_conversion<_From, _To, true, true>
882  : public __bool_constant<(
883  __digits_v<_From> > __digits_v<_To>
884  || __finite_max_v<_From> > __finite_max_v<_To>
885  || __finite_min_v<_From> < __finite_min_v<_To>
886  || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
887 
888 template <typename _Tp>
889  struct __is_narrowing_conversion<_Tp, bool, true, true>
890  : public true_type {};
891 
892 template <>
893  struct __is_narrowing_conversion<bool, bool, true, true>
894  : public false_type {};
895 
896 template <typename _Tp>
897  struct __is_narrowing_conversion<_Tp, _Tp, true, true>
898  : public false_type {};
899 
900 template <typename _From, typename _To>
901  struct __is_narrowing_conversion<_From, _To, false, true>
902  : public negation<is_convertible<_From, _To>> {};
903 
904 // }}}
905 // __converts_to_higher_integer_rank{{{
906 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
907  struct __converts_to_higher_integer_rank : public true_type {};
908 
909 // this may fail for char -> short if sizeof(char) == sizeof(short)
910 template <typename _From, typename _To>
911  struct __converts_to_higher_integer_rank<_From, _To, false>
912  : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
913 
914 // }}}
915 // __data(simd/simd_mask) {{{
916 template <typename _Tp, typename _Ap>
917  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
918  __data(const simd<_Tp, _Ap>& __x);
919 
920 template <typename _Tp, typename _Ap>
921  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
922  __data(simd<_Tp, _Ap>& __x);
923 
924 template <typename _Tp, typename _Ap>
925  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
926  __data(const simd_mask<_Tp, _Ap>& __x);
927 
928 template <typename _Tp, typename _Ap>
929  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
930  __data(simd_mask<_Tp, _Ap>& __x);
931 
932 // }}}
933 // _SimdConverter {{{
934 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
935  typename = void>
936  struct _SimdConverter;
937 
938 template <typename _Tp, typename _Ap>
939  struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
940  {
941  template <typename _Up>
942  _GLIBCXX_SIMD_INTRINSIC const _Up&
943  operator()(const _Up& __x)
944  { return __x; }
945  };
946 
947 // }}}
948 // __to_value_type_or_member_type {{{
949 template <typename _V>
950  _GLIBCXX_SIMD_INTRINSIC constexpr auto
951  __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
952  { return __data(__x); }
953 
954 template <typename _V>
955  _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
956  __to_value_type_or_member_type(const typename _V::value_type& __x)
957  { return __x; }
958 
959 // }}}
960 // __bool_storage_member_type{{{
961 template <size_t _Size>
962  struct __bool_storage_member_type;
963 
964 template <size_t _Size>
965  using __bool_storage_member_type_t =
966  typename __bool_storage_member_type<_Size>::type;
967 
968 // }}}
969 // _SimdTuple {{{
970 // why not tuple?
971 // 1. tuple gives no guarantee about the storage order, but I require
972 // storage
973 // equivalent to array<_Tp, _Np>
974 // 2. direct access to the element type (first template argument)
975 // 3. enforces equal element type, only different _Abi types are allowed
976 template <typename _Tp, typename... _Abis>
977  struct _SimdTuple;
978 
979 //}}}
980 // __fixed_size_storage_t {{{
981 template <typename _Tp, int _Np>
982  struct __fixed_size_storage;
983 
984 template <typename _Tp, int _Np>
985  using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
986 
987 // }}}
988 // _SimdWrapper fwd decl{{{
989 template <typename _Tp, size_t _Size, typename = void_t<>>
990  struct _SimdWrapper;
991 
992 template <typename _Tp>
993  using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
994 template <typename _Tp>
995  using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
996 template <typename _Tp>
997  using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
998 template <typename _Tp>
999  using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
1000 
1001 // }}}
1002 // __is_simd_wrapper {{{
1003 template <typename _Tp>
1004  struct __is_simd_wrapper : false_type {};
1005 
1006 template <typename _Tp, size_t _Np>
1007  struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1008 
1009 template <typename _Tp>
1010  inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1011 
1012 // }}}
1013 // _BitOps {{{
1014 struct _BitOps
1015 {
1016  // _S_bit_iteration {{{
1017  template <typename _Tp, typename _Fp>
1018  static void
1019  _S_bit_iteration(_Tp __mask, _Fp&& __f)
1020  {
1021  static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1022  conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1023  if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1024  __k = __mask;
1025  else
1026  __k = __mask.to_ullong();
1027  while(__k)
1028  {
1029  __f(std::__countr_zero(__k));
1030  __k &= (__k - 1);
1031  }
1032  }
1033 
1034  //}}}
1035 };
1036 
1037 //}}}
1038 // __increment, __decrement {{{
1039 template <typename _Tp = void>
1040  struct __increment
1041  { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1042 
1043 template <>
1044  struct __increment<void>
1045  {
1046  template <typename _Tp>
1047  constexpr _Tp
1048  operator()(_Tp __a) const
1049  { return ++__a; }
1050  };
1051 
1052 template <typename _Tp = void>
1053  struct __decrement
1054  { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1055 
1056 template <>
1057  struct __decrement<void>
1058  {
1059  template <typename _Tp>
1060  constexpr _Tp
1061  operator()(_Tp __a) const
1062  { return --__a; }
1063  };
1064 
1065 // }}}
1066 // _ValuePreserving(OrInt) {{{
1067 template <typename _From, typename _To,
1068  typename = enable_if_t<negation<
1069  __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1070  using _ValuePreserving = _From;
1071 
1072 template <typename _From, typename _To,
1073  typename _DecayedFrom = __remove_cvref_t<_From>,
1074  typename = enable_if_t<conjunction<
1075  is_convertible<_From, _To>,
1076  disjunction<
1077  is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1078  conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1079  negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1080  using _ValuePreservingOrInt = _From;
1081 
1082 // }}}
1083 // __intrinsic_type {{{
1084 template <typename _Tp, size_t _Bytes, typename = void_t<>>
1085  struct __intrinsic_type;
1086 
1087 template <typename _Tp, size_t _Size>
1088  using __intrinsic_type_t =
1089  typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1090 
1091 template <typename _Tp>
1092  using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1093 template <typename _Tp>
1094  using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1095 template <typename _Tp>
1096  using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1097 template <typename _Tp>
1098  using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1099 template <typename _Tp>
1100  using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1101 template <typename _Tp>
1102  using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1103 
1104 // }}}
1105 // _BitMask {{{
1106 template <size_t _Np, bool _Sanitized = false>
1107  struct _BitMask;
1108 
1109 template <size_t _Np, bool _Sanitized>
1110  struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1111 
1112 template <size_t _Np>
1113  using _SanitizedBitMask = _BitMask<_Np, true>;
1114 
1115 template <size_t _Np, bool _Sanitized>
1116  struct _BitMask
1117  {
1118  static_assert(_Np > 0);
1119 
1120  static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1121 
1122  using _Tp = conditional_t<_Np == 1, bool,
1123  make_unsigned_t<__int_with_sizeof_t<std::min(
1124  sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1125 
1126  static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1127 
1128  _Tp _M_bits[_S_array_size];
1129 
1130  static constexpr int _S_unused_bits
1131  = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1132 
1133  static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1134 
1135  constexpr _BitMask() noexcept = default;
1136 
1137  constexpr _BitMask(unsigned long long __x) noexcept
1138  : _M_bits{static_cast<_Tp>(__x)} {}
1139 
1140  _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1141 
1142  constexpr _BitMask(const _BitMask&) noexcept = default;
1143 
1144  template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1145  && _Sanitized == true>>
1146  constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1147  : _BitMask(__rhs._M_sanitized()) {}
1148 
1149  constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1150  {
1151  static_assert(_S_array_size == 1);
1152  return _M_bits[0];
1153  }
1154 
1155  // precondition: is sanitized
1156  constexpr _Tp
1157  _M_to_bits() const noexcept
1158  {
1159  static_assert(_S_array_size == 1);
1160  return _M_bits[0];
1161  }
1162 
1163  // precondition: is sanitized
1164  constexpr unsigned long long
1165  to_ullong() const noexcept
1166  {
1167  static_assert(_S_array_size == 1);
1168  return _M_bits[0];
1169  }
1170 
1171  // precondition: is sanitized
1172  constexpr unsigned long
1173  to_ulong() const noexcept
1174  {
1175  static_assert(_S_array_size == 1);
1176  return _M_bits[0];
1177  }
1178 
1179  constexpr bitset<_Np>
1180  _M_to_bitset() const noexcept
1181  {
1182  static_assert(_S_array_size == 1);
1183  return _M_bits[0];
1184  }
1185 
1186  constexpr decltype(auto)
1187  _M_sanitized() const noexcept
1188  {
1189  if constexpr (_Sanitized)
1190  return *this;
1191  else if constexpr (_Np == 1)
1192  return _SanitizedBitMask<_Np>(_M_bits[0]);
1193  else
1194  {
1195  _SanitizedBitMask<_Np> __r = {};
1196  for (int __i = 0; __i < _S_array_size; ++__i)
1197  __r._M_bits[__i] = _M_bits[__i];
1198  if constexpr (_S_unused_bits > 0)
1199  __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1200  return __r;
1201  }
1202  }
1203 
1204  template <size_t _Mp, bool _LSanitized>
1205  constexpr _BitMask<_Np + _Mp, _Sanitized>
1206  _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1207  {
1208  constexpr size_t _RN = _Np + _Mp;
1209  using _Rp = _BitMask<_RN, _Sanitized>;
1210  if constexpr (_Rp::_S_array_size == 1)
1211  {
1212  _Rp __r{{_M_bits[0]}};
1213  __r._M_bits[0] <<= _Mp;
1214  __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1215  return __r;
1216  }
1217  else
1218  __assert_unreachable<_Rp>();
1219  }
1220 
1221  // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1222  // significant bits. If the operation implicitly produces a sanitized bitmask,
1223  // the result type will have _Sanitized set.
1224  template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1225  constexpr auto
1226  _M_extract() const noexcept
1227  {
1228  static_assert(_Np > _DropLsb);
1229  static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1230  "not implemented for bitmasks larger than one ullong");
1231  if constexpr (_NewSize == 1)
1232  // must sanitize because the return _Tp is bool
1233  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1234  else
1235  return _BitMask<_NewSize,
1236  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1237  && _NewSize + _DropLsb <= _Np)
1238  || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1239  && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1240  >> _DropLsb);
1241  }
1242 
1243  // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1244  constexpr bool
1245  all() const noexcept
1246  {
1247  if constexpr (_Np == 1)
1248  return _M_bits[0];
1249  else if constexpr (!_Sanitized)
1250  return _M_sanitized().all();
1251  else
1252  {
1253  constexpr _Tp __allbits = ~_Tp();
1254  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1255  if (_M_bits[__i] != __allbits)
1256  return false;
1257  return _M_bits[_S_array_size - 1] == _S_bitmask;
1258  }
1259  }
1260 
1261  // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1262  // false.
1263  constexpr bool
1264  any() const noexcept
1265  {
1266  if constexpr (_Np == 1)
1267  return _M_bits[0];
1268  else if constexpr (!_Sanitized)
1269  return _M_sanitized().any();
1270  else
1271  {
1272  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1273  if (_M_bits[__i] != 0)
1274  return true;
1275  return _M_bits[_S_array_size - 1] != 0;
1276  }
1277  }
1278 
1279  // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1280  constexpr bool
1281  none() const noexcept
1282  {
1283  if constexpr (_Np == 1)
1284  return !_M_bits[0];
1285  else if constexpr (!_Sanitized)
1286  return _M_sanitized().none();
1287  else
1288  {
1289  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1290  if (_M_bits[__i] != 0)
1291  return false;
1292  return _M_bits[_S_array_size - 1] == 0;
1293  }
1294  }
1295 
1296  // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1297  // false.
1298  constexpr int
1299  count() const noexcept
1300  {
1301  if constexpr (_Np == 1)
1302  return _M_bits[0];
1303  else if constexpr (!_Sanitized)
1304  return _M_sanitized().none();
1305  else
1306  {
1307  int __result = __builtin_popcountll(_M_bits[0]);
1308  for (int __i = 1; __i < _S_array_size; ++__i)
1309  __result += __builtin_popcountll(_M_bits[__i]);
1310  return __result;
1311  }
1312  }
1313 
1314  // Returns the bit at offset __i as bool.
1315  constexpr bool
1316  operator[](size_t __i) const noexcept
1317  {
1318  if constexpr (_Np == 1)
1319  return _M_bits[0];
1320  else if constexpr (_S_array_size == 1)
1321  return (_M_bits[0] >> __i) & 1;
1322  else
1323  {
1324  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1325  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1326  return (_M_bits[__j] >> __shift) & 1;
1327  }
1328  }
1329 
1330  template <size_t __i>
1331  constexpr bool
1332  operator[](_SizeConstant<__i>) const noexcept
1333  {
1334  static_assert(__i < _Np);
1335  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1336  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1337  return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1338  }
1339 
1340  // Set the bit at offset __i to __x.
1341  constexpr void
1342  set(size_t __i, bool __x) noexcept
1343  {
1344  if constexpr (_Np == 1)
1345  _M_bits[0] = __x;
1346  else if constexpr (_S_array_size == 1)
1347  {
1348  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1349  _M_bits[0] |= _Tp(_Tp(__x) << __i);
1350  }
1351  else
1352  {
1353  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1354  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1355  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1356  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1357  }
1358  }
1359 
1360  template <size_t __i>
1361  constexpr void
1362  set(_SizeConstant<__i>, bool __x) noexcept
1363  {
1364  static_assert(__i < _Np);
1365  if constexpr (_Np == 1)
1366  _M_bits[0] = __x;
1367  else
1368  {
1369  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1370  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1371  constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1372  _M_bits[__j] &= __mask;
1373  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1374  }
1375  }
1376 
1377  // Inverts all bits. Sanitized input leads to sanitized output.
1378  constexpr _BitMask
1379  operator~() const noexcept
1380  {
1381  if constexpr (_Np == 1)
1382  return !_M_bits[0];
1383  else
1384  {
1385  _BitMask __result{};
1386  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1387  __result._M_bits[__i] = ~_M_bits[__i];
1388  if constexpr (_Sanitized)
1389  __result._M_bits[_S_array_size - 1]
1390  = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1391  else
1392  __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1393  return __result;
1394  }
1395  }
1396 
1397  constexpr _BitMask&
1398  operator^=(const _BitMask& __b) & noexcept
1399  {
1400  __execute_n_times<_S_array_size>(
1401  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
1402  return *this;
1403  }
1404 
1405  constexpr _BitMask&
1406  operator|=(const _BitMask& __b) & noexcept
1407  {
1408  __execute_n_times<_S_array_size>(
1409  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
1410  return *this;
1411  }
1412 
1413  constexpr _BitMask&
1414  operator&=(const _BitMask& __b) & noexcept
1415  {
1416  __execute_n_times<_S_array_size>(
1417  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
1418  return *this;
1419  }
1420 
1421  friend constexpr _BitMask
1422  operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1423  {
1424  _BitMask __r = __a;
1425  __r ^= __b;
1426  return __r;
1427  }
1428 
1429  friend constexpr _BitMask
1430  operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1431  {
1432  _BitMask __r = __a;
1433  __r |= __b;
1434  return __r;
1435  }
1436 
1437  friend constexpr _BitMask
1438  operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1439  {
1440  _BitMask __r = __a;
1441  __r &= __b;
1442  return __r;
1443  }
1444 
1445  _GLIBCXX_SIMD_INTRINSIC
1446  constexpr bool
1447  _M_is_constprop() const
1448  {
1449  if constexpr (_S_array_size == 0)
1450  return __builtin_constant_p(_M_bits[0]);
1451  else
1452  {
1453  for (int __i = 0; __i < _S_array_size; ++__i)
1454  if (!__builtin_constant_p(_M_bits[__i]))
1455  return false;
1456  return true;
1457  }
1458  }
1459  };
1460 
1461 // }}}
1462 
1463 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1464 // __min_vector_size {{{
1465 template <typename _Tp = void>
1466  static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1467 
1468 #if _GLIBCXX_SIMD_HAVE_NEON
1469 template <>
1470  inline constexpr int __min_vector_size<void> = 8;
1471 #else
1472 template <>
1473  inline constexpr int __min_vector_size<void> = 16;
1474 #endif
1475 
1476 // }}}
1477 // __vector_type {{{
1478 template <typename _Tp, size_t _Np, typename = void>
1479  struct __vector_type_n {};
1480 
1481 // substition failure for 0-element case
1482 template <typename _Tp>
1483  struct __vector_type_n<_Tp, 0, void> {};
1484 
1485 // special case 1-element to be _Tp itself
1486 template <typename _Tp>
1487  struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1488  { using type = _Tp; };
1489 
1490 // else, use GNU-style builtin vector types
1491 template <typename _Tp, size_t _Np>
1492  struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1493  {
1494  static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1495 
1496  static constexpr size_t _S_Bytes =
1497 #ifdef __i386__
1498  // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1499  // those objects are passed via MMX registers and nothing ever calls EMMS.
1500  _S_Np2 == 8 ? 16 :
1501 #endif
1502  _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1503  : _S_Np2;
1504 
1505  using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1506  };
1507 
1508 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1509  struct __vector_type;
1510 
1511 template <typename _Tp, size_t _Bytes>
1512  struct __vector_type<_Tp, _Bytes, 0>
1513  : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1514 
1515 template <typename _Tp, size_t _Size>
1516  using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1517 
1518 template <typename _Tp>
1519  using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1520 template <typename _Tp>
1521  using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1522 template <typename _Tp>
1523  using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1524 template <typename _Tp>
1525  using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1526 template <typename _Tp>
1527  using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1528 template <typename _Tp>
1529  using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1530 
1531 // }}}
1532 // __is_vector_type {{{
1533 template <typename _Tp, typename = void_t<>>
1534  struct __is_vector_type : false_type {};
1535 
1536 template <typename _Tp>
1537  struct __is_vector_type<
1538  _Tp, void_t<typename __vector_type<
1539  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1540  : is_same<_Tp, typename __vector_type<
1541  remove_reference_t<decltype(declval<_Tp>()[0])>,
1542  sizeof(_Tp)>::type> {};
1543 
1544 template <typename _Tp>
1545  inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1546 
1547 // }}}
1548 // __is_intrinsic_type {{{
1549 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
1550 template <typename _Tp>
1551  using __is_intrinsic_type = __is_vector_type<_Tp>;
1552 #else // not SSE (x86)
1553 template <typename _Tp, typename = void_t<>>
1554  struct __is_intrinsic_type : false_type {};
1555 
1556 template <typename _Tp>
1557  struct __is_intrinsic_type<
1558  _Tp, void_t<typename __intrinsic_type<
1559  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1560  : is_same<_Tp, typename __intrinsic_type<
1561  remove_reference_t<decltype(declval<_Tp>()[0])>,
1562  sizeof(_Tp)>::type> {};
1563 #endif
1564 
1565 template <typename _Tp>
1566  inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1567 
1568 // }}}
1569 // _VectorTraits{{{
1570 template <typename _Tp, typename = void_t<>>
1571  struct _VectorTraitsImpl;
1572 
1573 template <typename _Tp>
1574  struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1575  || __is_intrinsic_type_v<_Tp>>>
1576  {
1577  using type = _Tp;
1578  using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1579  static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1580  using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1581  template <typename _Up, int _W = _S_full_size>
1582  static constexpr bool _S_is
1583  = is_same_v<value_type, _Up> && _W == _S_full_size;
1584  };
1585 
1586 template <typename _Tp, size_t _Np>
1587  struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1588  void_t<__vector_type_t<_Tp, _Np>>>
1589  {
1590  using type = __vector_type_t<_Tp, _Np>;
1591  using value_type = _Tp;
1592  static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1593  using _Wrapper = _SimdWrapper<_Tp, _Np>;
1594  static constexpr bool _S_is_partial = (_Np == _S_full_size);
1595  static constexpr int _S_partial_width = _Np;
1596  template <typename _Up, int _W = _S_full_size>
1597  static constexpr bool _S_is
1598  = is_same_v<value_type, _Up>&& _W == _S_full_size;
1599  };
1600 
1601 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1602  using _VectorTraits = _VectorTraitsImpl<_Tp>;
1603 
1604 // }}}
1605 // __as_vector{{{
1606 template <typename _V>
1607  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1608  __as_vector(_V __x)
1609  {
1610  if constexpr (__is_vector_type_v<_V>)
1611  return __x;
1612  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1613  return __data(__x)._M_data;
1614  else if constexpr (__is_vectorizable_v<_V>)
1615  return __vector_type_t<_V, 2>{__x};
1616  else
1617  return __x._M_data;
1618  }
1619 
1620 // }}}
1621 // __as_wrapper{{{
1622 template <size_t _Np = 0, typename _V>
1623  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1624  __as_wrapper(_V __x)
1625  {
1626  if constexpr (__is_vector_type_v<_V>)
1627  return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1628  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1629  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1630  {
1631  static_assert(_V::size() == _Np);
1632  return __data(__x);
1633  }
1634  else
1635  {
1636  static_assert(_V::_S_size == _Np);
1637  return __x;
1638  }
1639  }
1640 
1641 // }}}
1642 // __intrin_bitcast{{{
1643 template <typename _To, typename _From>
1644  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1645  __intrin_bitcast(_From __v)
1646  {
1647  static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1648  && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1649  if constexpr (sizeof(_To) == sizeof(_From))
1650  return reinterpret_cast<_To>(__v);
1651  else if constexpr (sizeof(_From) > sizeof(_To))
1652  if constexpr (sizeof(_To) >= 16)
1653  return reinterpret_cast<const __may_alias<_To>&>(__v);
1654  else
1655  {
1656  _To __r;
1657  __builtin_memcpy(&__r, &__v, sizeof(_To));
1658  return __r;
1659  }
1660 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1661  else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1662  return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1663  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1664  else if constexpr (__have_avx512f && sizeof(_From) == 16
1665  && sizeof(_To) == 64)
1666  return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1667  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1668  else if constexpr (__have_avx512f && sizeof(_From) == 32
1669  && sizeof(_To) == 64)
1670  return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1671  reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1672 #endif // _GLIBCXX_SIMD_X86INTRIN
1673  else if constexpr (sizeof(__v) <= 8)
1674  return reinterpret_cast<_To>(
1675  __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1676  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1677  else
1678  {
1679  static_assert(sizeof(_To) > sizeof(_From));
1680  _To __r = {};
1681  __builtin_memcpy(&__r, &__v, sizeof(_From));
1682  return __r;
1683  }
1684  }
1685 
1686 // }}}
1687 // __vector_bitcast{{{
1688 template <typename _To, size_t _NN = 0, typename _From,
1689  typename _FromVT = _VectorTraits<_From>,
1690  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1691  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1692  __vector_bitcast(_From __x)
1693  {
1694  using _R = __vector_type_t<_To, _Np>;
1695  return __intrin_bitcast<_R>(__x);
1696  }
1697 
1698 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1699  size_t _Np
1700  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1701  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1702  __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1703  {
1704  static_assert(_Np > 1);
1705  return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1706  }
1707 
1708 // }}}
1709 // __convert_x86 declarations {{{
1710 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1711 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1712  _To __convert_x86(_Tp);
1713 
1714 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1715  _To __convert_x86(_Tp, _Tp);
1716 
1717 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1718  _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1719 
1720 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1721  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1722 
1723 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1724  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1725  _Tp, _Tp, _Tp, _Tp);
1726 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1727 
1728 //}}}
1729 // __bit_cast {{{
1730 template <typename _To, typename _From>
1731  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1732  __bit_cast(const _From __x)
1733  {
1734 #if __has_builtin(__builtin_bit_cast)
1735  return __builtin_bit_cast(_To, __x);
1736 #else
1737  static_assert(sizeof(_To) == sizeof(_From));
1738  constexpr bool __to_is_vectorizable
1739  = is_arithmetic_v<_To> || is_enum_v<_To>;
1740  constexpr bool __from_is_vectorizable
1741  = is_arithmetic_v<_From> || is_enum_v<_From>;
1742  if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1743  return reinterpret_cast<_To>(__x);
1744  else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1745  {
1746  using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1747  return reinterpret_cast<_To>(_FV{__x});
1748  }
1749  else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1750  {
1751  using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1752  using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1753  return reinterpret_cast<_TV>(_FV{__x})[0];
1754  }
1755  else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1756  {
1757  using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1758  return reinterpret_cast<_TV>(__x)[0];
1759  }
1760  else
1761  {
1762  _To __r;
1763  __builtin_memcpy(reinterpret_cast<char*>(&__r),
1764  reinterpret_cast<const char*>(&__x), sizeof(_To));
1765  return __r;
1766  }
1767 #endif
1768  }
1769 
1770 // }}}
1771 // __to_intrin {{{
1772 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1773  typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1774  _GLIBCXX_SIMD_INTRINSIC constexpr _R
1775  __to_intrin(_Tp __x)
1776  {
1777  static_assert(sizeof(__x) <= sizeof(_R),
1778  "__to_intrin may never drop values off the end");
1779  if constexpr (sizeof(__x) == sizeof(_R))
1780  return reinterpret_cast<_R>(__as_vector(__x));
1781  else
1782  {
1783  using _Up = __int_for_sizeof_t<_Tp>;
1784  return reinterpret_cast<_R>(
1785  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1786  }
1787  }
1788 
1789 // }}}
1790 // __make_vector{{{
1791 template <typename _Tp, typename... _Args>
1792  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1793  __make_vector(const _Args&... __args)
1794  { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
1795 
1796 // }}}
1797 // __vector_broadcast{{{
1798 template <size_t _Np, typename _Tp, size_t... _I>
1799  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1800  __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1801  { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1802 
1803 template <size_t _Np, typename _Tp>
1804  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1805  __vector_broadcast(_Tp __x)
1806  { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1807 
1808 // }}}
1809 // __generate_vector{{{
1810  template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1811  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1812  __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1813  { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
1814 
1815 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1816  _GLIBCXX_SIMD_INTRINSIC constexpr _V
1817  __generate_vector(_Gp&& __gen)
1818  {
1819  if constexpr (__is_vector_type_v<_V>)
1820  return __generate_vector_impl<typename _VVT::value_type,
1821  _VVT::_S_full_size>(
1822  static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1823  else
1824  return __generate_vector_impl<typename _VVT::value_type,
1825  _VVT::_S_partial_width>(
1826  static_cast<_Gp&&>(__gen),
1827  make_index_sequence<_VVT::_S_partial_width>());
1828  }
1829 
1830 template <typename _Tp, size_t _Np, typename _Gp>
1831  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1832  __generate_vector(_Gp&& __gen)
1833  {
1834  return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1835  make_index_sequence<_Np>());
1836  }
1837 
1838 // }}}
1839 // __xor{{{
1840 template <typename _TW>
1841  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1842  __xor(_TW __a, _TW __b) noexcept
1843  {
1844  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1845  {
1846  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1847  _VectorTraitsImpl<_TW>>::value_type;
1848  if constexpr (is_floating_point_v<_Tp>)
1849  {
1850  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1851  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1852  ^ __vector_bitcast<_Ip>(__b));
1853  }
1854  else if constexpr (__is_vector_type_v<_TW>)
1855  return __a ^ __b;
1856  else
1857  return __a._M_data ^ __b._M_data;
1858  }
1859  else
1860  return __a ^ __b;
1861  }
1862 
1863 // }}}
1864 // __or{{{
1865 template <typename _TW>
1866  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1867  __or(_TW __a, _TW __b) noexcept
1868  {
1869  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1870  {
1871  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1872  _VectorTraitsImpl<_TW>>::value_type;
1873  if constexpr (is_floating_point_v<_Tp>)
1874  {
1875  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1876  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1877  | __vector_bitcast<_Ip>(__b));
1878  }
1879  else if constexpr (__is_vector_type_v<_TW>)
1880  return __a | __b;
1881  else
1882  return __a._M_data | __b._M_data;
1883  }
1884  else
1885  return __a | __b;
1886  }
1887 
1888 // }}}
1889 // __and{{{
1890 template <typename _TW>
1891  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1892  __and(_TW __a, _TW __b) noexcept
1893  {
1894  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1895  {
1896  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1897  _VectorTraitsImpl<_TW>>::value_type;
1898  if constexpr (is_floating_point_v<_Tp>)
1899  {
1900  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1901  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1902  & __vector_bitcast<_Ip>(__b));
1903  }
1904  else if constexpr (__is_vector_type_v<_TW>)
1905  return __a & __b;
1906  else
1907  return __a._M_data & __b._M_data;
1908  }
1909  else
1910  return __a & __b;
1911  }
1912 
1913 // }}}
1914 // __andnot{{{
1915 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1916 static constexpr struct
1917 {
1918  _GLIBCXX_SIMD_INTRINSIC __v4sf
1919  operator()(__v4sf __a, __v4sf __b) const noexcept
1920  { return __builtin_ia32_andnps(__a, __b); }
1921 
1922  _GLIBCXX_SIMD_INTRINSIC __v2df
1923  operator()(__v2df __a, __v2df __b) const noexcept
1924  { return __builtin_ia32_andnpd(__a, __b); }
1925 
1926  _GLIBCXX_SIMD_INTRINSIC __v2di
1927  operator()(__v2di __a, __v2di __b) const noexcept
1928  { return __builtin_ia32_pandn128(__a, __b); }
1929 
1930  _GLIBCXX_SIMD_INTRINSIC __v8sf
1931  operator()(__v8sf __a, __v8sf __b) const noexcept
1932  { return __builtin_ia32_andnps256(__a, __b); }
1933 
1934  _GLIBCXX_SIMD_INTRINSIC __v4df
1935  operator()(__v4df __a, __v4df __b) const noexcept
1936  { return __builtin_ia32_andnpd256(__a, __b); }
1937 
1938  _GLIBCXX_SIMD_INTRINSIC __v4di
1939  operator()(__v4di __a, __v4di __b) const noexcept
1940  {
1941  if constexpr (__have_avx2)
1942  return __builtin_ia32_andnotsi256(__a, __b);
1943  else
1944  return reinterpret_cast<__v4di>(
1945  __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1946  reinterpret_cast<__v4df>(__b)));
1947  }
1948 
1949  _GLIBCXX_SIMD_INTRINSIC __v16sf
1950  operator()(__v16sf __a, __v16sf __b) const noexcept
1951  {
1952  if constexpr (__have_avx512dq)
1953  return _mm512_andnot_ps(__a, __b);
1954  else
1955  return reinterpret_cast<__v16sf>(
1956  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1957  reinterpret_cast<__v8di>(__b)));
1958  }
1959 
1960  _GLIBCXX_SIMD_INTRINSIC __v8df
1961  operator()(__v8df __a, __v8df __b) const noexcept
1962  {
1963  if constexpr (__have_avx512dq)
1964  return _mm512_andnot_pd(__a, __b);
1965  else
1966  return reinterpret_cast<__v8df>(
1967  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1968  reinterpret_cast<__v8di>(__b)));
1969  }
1970 
1971  _GLIBCXX_SIMD_INTRINSIC __v8di
1972  operator()(__v8di __a, __v8di __b) const noexcept
1973  { return _mm512_andnot_si512(__a, __b); }
1974 } _S_x86_andnot;
1975 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1976 
1977 template <typename _TW>
1978  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1979  __andnot(_TW __a, _TW __b) noexcept
1980  {
1981  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1982  {
1983  using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1984  _VectorTraitsImpl<_TW>>;
1985  using _Tp = typename _TVT::value_type;
1986 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1987  if constexpr (sizeof(_TW) >= 16)
1988  {
1989  const auto __ai = __to_intrin(__a);
1990  const auto __bi = __to_intrin(__b);
1991  if (!__builtin_is_constant_evaluated()
1992  && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
1993  {
1994  const auto __r = _S_x86_andnot(__ai, __bi);
1995  if constexpr (is_convertible_v<decltype(__r), _TW>)
1996  return __r;
1997  else
1998  return reinterpret_cast<typename _TVT::type>(__r);
1999  }
2000  }
2001 #endif // _GLIBCXX_SIMD_X86INTRIN
2002  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2003  return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2004  & __vector_bitcast<_Ip>(__b));
2005  }
2006  else
2007  return ~__a & __b;
2008  }
2009 
2010 // }}}
2011 // __not{{{
2012 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2013  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2014  __not(_Tp __a) noexcept
2015  {
2016  if constexpr (is_floating_point_v<typename _TVT::value_type>)
2017  return reinterpret_cast<typename _TVT::type>(
2018  ~__vector_bitcast<unsigned>(__a));
2019  else
2020  return ~__a;
2021  }
2022 
2023 // }}}
2024 // __concat{{{
2025 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2026  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
2027  constexpr _R
2028  __concat(_Tp a_, _Tp b_)
2029  {
2030 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2031  using _W
2032  = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2033  conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2034  long long, typename _TVT::value_type>>;
2035  constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2036  const auto __a = __vector_bitcast<_W>(a_);
2037  const auto __b = __vector_bitcast<_W>(b_);
2038  using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2039 #else
2040  constexpr int input_width = _TVT::_S_full_size;
2041  const _Tp& __a = a_;
2042  const _Tp& __b = b_;
2043  using _Up = _R;
2044 #endif
2045  if constexpr (input_width == 2)
2046  return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2047  else if constexpr (input_width == 4)
2048  return reinterpret_cast<_R>(
2049  _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2050  else if constexpr (input_width == 8)
2051  return reinterpret_cast<_R>(
2052  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2053  __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2054  else if constexpr (input_width == 16)
2055  return reinterpret_cast<_R>(
2056  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2057  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2058  __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
2059  __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
2060  __b[12], __b[13], __b[14], __b[15]});
2061  else if constexpr (input_width == 32)
2062  return reinterpret_cast<_R>(
2063  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2064  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2065  __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2066  __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2067  __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
2068  __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
2069  __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2070  __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2071  __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2072  __b[31]});
2073  }
2074 
2075 // }}}
2076 // __zero_extend {{{
2077 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2078  struct _ZeroExtendProxy
2079  {
2080  using value_type = typename _TVT::value_type;
2081  static constexpr size_t _Np = _TVT::_S_full_size;
2082  const _Tp __x;
2083 
2084  template <typename _To, typename _ToVT = _VectorTraits<_To>,
2085  typename
2086  = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2087  _GLIBCXX_SIMD_INTRINSIC operator _To() const
2088  {
2089  constexpr size_t _ToN = _ToVT::_S_full_size;
2090  if constexpr (_ToN == _Np)
2091  return __x;
2092  else if constexpr (_ToN == 2 * _Np)
2093  {
2094 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2095  if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2096  return __vector_bitcast<value_type>(
2097  _mm256_insertf128_ps(__m256(), __x, 0));
2098  else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2099  return __vector_bitcast<value_type>(
2100  _mm256_insertf128_pd(__m256d(), __x, 0));
2101  else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2102  return __vector_bitcast<value_type>(
2103  _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2104  else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2105  {
2106  if constexpr (__have_avx512dq)
2107  return __vector_bitcast<value_type>(
2108  _mm512_insertf32x8(__m512(), __x, 0));
2109  else
2110  return reinterpret_cast<__m512>(
2111  _mm512_insertf64x4(__m512d(),
2112  reinterpret_cast<__m256d>(__x), 0));
2113  }
2114  else if constexpr (__have_avx512f
2115  && _TVT::template _S_is<double, 4>)
2116  return __vector_bitcast<value_type>(
2117  _mm512_insertf64x4(__m512d(), __x, 0));
2118  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2119  return __vector_bitcast<value_type>(
2120  _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2121 #endif
2122  return __concat(__x, _Tp());
2123  }
2124  else if constexpr (_ToN == 4 * _Np)
2125  {
2126 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2127  if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2128  {
2129  return __vector_bitcast<value_type>(
2130  _mm512_insertf64x2(__m512d(), __x, 0));
2131  }
2132  else if constexpr (__have_avx512f
2133  && is_floating_point_v<value_type>)
2134  {
2135  return __vector_bitcast<value_type>(
2136  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2137  0));
2138  }
2139  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2140  {
2141  return __vector_bitcast<value_type>(
2142  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2143  }
2144 #endif
2145  return __concat(__concat(__x, _Tp()),
2146  __vector_type_t<value_type, _Np * 2>());
2147  }
2148  else if constexpr (_ToN == 8 * _Np)
2149  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2150  __vector_type_t<value_type, _Np * 4>());
2151  else if constexpr (_ToN == 16 * _Np)
2152  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2153  __vector_type_t<value_type, _Np * 8>());
2154  else
2155  __assert_unreachable<_Tp>();
2156  }
2157  };
2158 
2159 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2160  _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2161  __zero_extend(_Tp __x)
2162  { return {__x}; }
2163 
2164 // }}}
2165 // __extract<_Np, By>{{{
2166 template <int _Offset,
2167  int _SplitBy,
2168  typename _Tp,
2169  typename _TVT = _VectorTraits<_Tp>,
2170  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
2171  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2172  __extract(_Tp __in)
2173  {
2174  using value_type = typename _TVT::value_type;
2175 #if _GLIBCXX_SIMD_X86INTRIN // {{{
2176  if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2177  {
2178  if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2179  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2180  else if constexpr (is_floating_point_v<value_type>)
2181  return __vector_bitcast<value_type>(
2182  _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2183  else
2184  return reinterpret_cast<_R>(
2185  _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2186  _Offset));
2187  }
2188  else
2189 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
2190  {
2191 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2192  using _W = conditional_t<
2193  is_floating_point_v<value_type>, double,
2194  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2195  static_assert(sizeof(_R) % sizeof(_W) == 0);
2196  constexpr int __return_width = sizeof(_R) / sizeof(_W);
2197  using _Up = __vector_type_t<_W, __return_width>;
2198  const auto __x = __vector_bitcast<_W>(__in);
2199 #else
2200  constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2201  using _Up = _R;
2202  const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2203  = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2204 #endif
2205  constexpr int _O = _Offset * __return_width;
2206  return __call_with_subscripts<__return_width, _O>(
2207  __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2208  return reinterpret_cast<_R>(_Up{__entries...});
2209  });
2210  }
2211  }
2212 
2213 // }}}
2214 // __lo/__hi64[z]{{{
2215 template <typename _Tp,
2216  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2217  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2218  __lo64(_Tp __x)
2219  {
2220  _R __r{};
2221  __builtin_memcpy(&__r, &__x, 8);
2222  return __r;
2223  }
2224 
2225 template <typename _Tp,
2226  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2227  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2228  __hi64(_Tp __x)
2229  {
2230  static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2231  _R __r{};
2232  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2233  return __r;
2234  }
2235 
2236 template <typename _Tp,
2237  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2238  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2239  __hi64z([[maybe_unused]] _Tp __x)
2240  {
2241  _R __r{};
2242  if constexpr (sizeof(_Tp) == 16)
2243  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2244  return __r;
2245  }
2246 
2247 // }}}
2248 // __lo/__hi128{{{
2249 template <typename _Tp>
2250  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2251  __lo128(_Tp __x)
2252  { return __extract<0, sizeof(_Tp) / 16>(__x); }
2253 
2254 template <typename _Tp>
2255  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2256  __hi128(_Tp __x)
2257  {
2258  static_assert(sizeof(__x) == 32);
2259  return __extract<1, 2>(__x);
2260  }
2261 
2262 // }}}
2263 // __lo/__hi256{{{
2264 template <typename _Tp>
2265  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2266  __lo256(_Tp __x)
2267  {
2268  static_assert(sizeof(__x) == 64);
2269  return __extract<0, 2>(__x);
2270  }
2271 
2272 template <typename _Tp>
2273  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2274  __hi256(_Tp __x)
2275  {
2276  static_assert(sizeof(__x) == 64);
2277  return __extract<1, 2>(__x);
2278  }
2279 
2280 // }}}
2281 // __auto_bitcast{{{
2282 template <typename _Tp>
2283  struct _AutoCast
2284  {
2285  static_assert(__is_vector_type_v<_Tp>);
2286 
2287  const _Tp __x;
2288 
2289  template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2290  _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2291  { return __intrin_bitcast<typename _UVT::type>(__x); }
2292  };
2293 
2294 template <typename _Tp>
2295  _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2296  __auto_bitcast(const _Tp& __x)
2297  { return {__x}; }
2298 
2299 template <typename _Tp, size_t _Np>
2300  _GLIBCXX_SIMD_INTRINSIC constexpr
2301  _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2302  __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2303  { return {__x._M_data}; }
2304 
2305 // }}}
2306 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2307 
2308 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
2309 // __bool_storage_member_type{{{
2310 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2311 template <size_t _Size>
2312  struct __bool_storage_member_type
2313  {
2314  static_assert((_Size & (_Size - 1)) != 0,
2315  "This trait may only be used for non-power-of-2 sizes. "
2316  "Power-of-2 sizes must be specialized.");
2317  using type =
2318  typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2319  };
2320 
2321 template <>
2322  struct __bool_storage_member_type<1> { using type = bool; };
2323 
2324 template <>
2325  struct __bool_storage_member_type<2> { using type = __mmask8; };
2326 
2327 template <>
2328  struct __bool_storage_member_type<4> { using type = __mmask8; };
2329 
2330 template <>
2331  struct __bool_storage_member_type<8> { using type = __mmask8; };
2332 
2333 template <>
2334  struct __bool_storage_member_type<16> { using type = __mmask16; };
2335 
2336 template <>
2337  struct __bool_storage_member_type<32> { using type = __mmask32; };
2338 
2339 template <>
2340  struct __bool_storage_member_type<64> { using type = __mmask64; };
2341 #endif // _GLIBCXX_SIMD_HAVE_AVX512F
2342 
2343 // }}}
2344 // __intrinsic_type (x86){{{
2345 // the following excludes bool via __is_vectorizable
2346 #if _GLIBCXX_SIMD_HAVE_SSE
2347 template <typename _Tp, size_t _Bytes>
2348  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2349  {
2350  static_assert(!is_same_v<_Tp, long double>,
2351  "no __intrinsic_type support for long double on x86");
2352 
2353  static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
2354 
2355  using type [[__gnu__::__vector_size__(_S_VBytes)]]
2356  = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2357  };
2358 #endif // _GLIBCXX_SIMD_HAVE_SSE
2359 
2360 // }}}
2361 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2362 // __intrinsic_type (ARM){{{
2363 #if _GLIBCXX_SIMD_HAVE_NEON
2364 template <>
2365  struct __intrinsic_type<float, 8, void>
2366  { using type = float32x2_t; };
2367 
2368 template <>
2369  struct __intrinsic_type<float, 16, void>
2370  { using type = float32x4_t; };
2371 
2372 template <>
2373  struct __intrinsic_type<double, 8, void>
2374  {
2375 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2376  using type = float64x1_t;
2377 #endif
2378  };
2379 
2380 template <>
2381  struct __intrinsic_type<double, 16, void>
2382  {
2383 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2384  using type = float64x2_t;
2385 #endif
2386  };
2387 
2388 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2389 template <> \
2390  struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2391  _Np * _Bits / 8, void> \
2392  { using type = int##_Bits##x##_Np##_t; }; \
2393 template <> \
2394  struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2395  _Np * _Bits / 8, void> \
2396  { using type = uint##_Bits##x##_Np##_t; }
2397 _GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2398 _GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2399 _GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2400 _GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2401 _GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2402 _GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2403 _GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2404 _GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2405 #undef _GLIBCXX_SIMD_ARM_INTRIN
2406 
2407 template <typename _Tp, size_t _Bytes>
2408  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2409  {
2410  static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2411 
2412  using _Ip = __int_for_sizeof_t<_Tp>;
2413 
2414  using _Up = conditional_t<
2415  is_floating_point_v<_Tp>, _Tp,
2416  conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2417 
2418  static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2419  "should use explicit specialization above");
2420 
2421  using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2422  };
2423 #endif // _GLIBCXX_SIMD_HAVE_NEON
2424 
2425 // }}}
2426 // __intrinsic_type (PPC){{{
2427 #ifdef __ALTIVEC__
2428 template <typename _Tp>
2429  struct __intrinsic_type_impl;
2430 
2431 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2432  template <> \
2433  struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2434 _GLIBCXX_SIMD_PPC_INTRIN(float);
2435 #ifdef __VSX__
2436 _GLIBCXX_SIMD_PPC_INTRIN(double);
2437 #endif
2438 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
2439 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2440 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
2441 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2442 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
2443 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2444 #if defined __VSX__ || __SIZEOF_LONG__ == 4
2445 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
2446 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2447 #endif
2448 #ifdef __VSX__
2449 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2450 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2451 #endif
2452 #undef _GLIBCXX_SIMD_PPC_INTRIN
2453 
2454 template <typename _Tp, size_t _Bytes>
2455  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2456  {
2457  static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2458 
2459  // allow _Tp == long double with -mlong-double-64
2460  static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2461  "no __intrinsic_type support for 128-bit floating point on PowerPC");
2462 
2463 #ifndef __VSX__
2464  static_assert(!(is_same_v<_Tp, double>
2465  || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2466  "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2467 #endif
2468 
2469  static constexpr auto __element_type()
2470  {
2471  if constexpr (is_floating_point_v<_Tp>)
2472  {
2473  if constexpr (_S_is_ldouble)
2474  return double {};
2475  else
2476  return _Tp {};
2477  }
2478  else if constexpr (is_signed_v<_Tp>)
2479  {
2480  if constexpr (sizeof(_Tp) == sizeof(_SChar))
2481  return _SChar {};
2482  else if constexpr (sizeof(_Tp) == sizeof(short))
2483  return short {};
2484  else if constexpr (sizeof(_Tp) == sizeof(int))
2485  return int {};
2486  else if constexpr (sizeof(_Tp) == sizeof(_LLong))
2487  return _LLong {};
2488  }
2489  else
2490  {
2491  if constexpr (sizeof(_Tp) == sizeof(_UChar))
2492  return _UChar {};
2493  else if constexpr (sizeof(_Tp) == sizeof(_UShort))
2494  return _UShort {};
2495  else if constexpr (sizeof(_Tp) == sizeof(_UInt))
2496  return _UInt {};
2497  else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
2498  return _ULLong {};
2499  }
2500  }
2501 
2502  using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
2503  };
2504 #endif // __ALTIVEC__
2505 
2506 // }}}
2507 // _SimdWrapper<bool>{{{1
2508 template <size_t _Width>
2509  struct _SimdWrapper<bool, _Width,
2510  void_t<typename __bool_storage_member_type<_Width>::type>>
2511  {
2512  using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2513  using value_type = bool;
2514 
2515  static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2516 
2517  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2518  __as_full_vector() const
2519  { return _M_data; }
2520 
2521  _GLIBCXX_SIMD_INTRINSIC constexpr
2522  _SimdWrapper() = default;
2523 
2524  _GLIBCXX_SIMD_INTRINSIC constexpr
2525  _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
2526 
2527  _GLIBCXX_SIMD_INTRINSIC
2528  operator const _BuiltinType&() const
2529  { return _M_data; }
2530 
2531  _GLIBCXX_SIMD_INTRINSIC
2532  operator _BuiltinType&()
2533  { return _M_data; }
2534 
2535  _GLIBCXX_SIMD_INTRINSIC _BuiltinType
2536  __intrin() const
2537  { return _M_data; }
2538 
2539  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2540  operator[](size_t __i) const
2541  { return _M_data & (_BuiltinType(1) << __i); }
2542 
2543  template <size_t __i>
2544  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2545  operator[](_SizeConstant<__i>) const
2546  { return _M_data & (_BuiltinType(1) << __i); }
2547 
2548  _GLIBCXX_SIMD_INTRINSIC constexpr void
2549  _M_set(size_t __i, value_type __x)
2550  {
2551  if (__x)
2552  _M_data |= (_BuiltinType(1) << __i);
2553  else
2554  _M_data &= ~(_BuiltinType(1) << __i);
2555  }
2556 
2557  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2558  _M_is_constprop() const
2559  { return __builtin_constant_p(_M_data); }
2560 
2561  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2562  _M_is_constprop_none_of() const
2563  {
2564  if (__builtin_constant_p(_M_data))
2565  {
2566  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2567  constexpr _BuiltinType __active_mask
2568  = ~_BuiltinType() >> (__nbits - _Width);
2569  return (_M_data & __active_mask) == 0;
2570  }
2571  return false;
2572  }
2573 
2574  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2575  _M_is_constprop_all_of() const
2576  {
2577  if (__builtin_constant_p(_M_data))
2578  {
2579  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2580  constexpr _BuiltinType __active_mask
2581  = ~_BuiltinType() >> (__nbits - _Width);
2582  return (_M_data & __active_mask) == __active_mask;
2583  }
2584  return false;
2585  }
2586 
2587  _BuiltinType _M_data;
2588  };
2589 
2590 // _SimdWrapperBase{{{1
2591 template <bool _MustZeroInitPadding, typename _BuiltinType>
2592  struct _SimdWrapperBase;
2593 
2594 template <typename _BuiltinType>
2595  struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2596  {
2597  _GLIBCXX_SIMD_INTRINSIC constexpr
2598  _SimdWrapperBase() = default;
2599 
2600  _GLIBCXX_SIMD_INTRINSIC constexpr
2601  _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2602 
2603  _BuiltinType _M_data;
2604  };
2605 
2606 template <typename _BuiltinType>
2607  struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2608  // never become SNaN
2609  {
2610  _GLIBCXX_SIMD_INTRINSIC constexpr
2611  _SimdWrapperBase() : _M_data() {}
2612 
2613  _GLIBCXX_SIMD_INTRINSIC constexpr
2614  _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2615 
2616  _BuiltinType _M_data;
2617  };
2618 
2619 // }}}
2620 // _SimdWrapper{{{
2621 template <typename _Tp, size_t _Width>
2622  struct _SimdWrapper<
2623  _Tp, _Width,
2624  void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2625  : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2626  && sizeof(_Tp) * _Width
2627  == sizeof(__vector_type_t<_Tp, _Width>),
2628  __vector_type_t<_Tp, _Width>>
2629  {
2630  using _Base
2631  = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2632  && sizeof(_Tp) * _Width
2633  == sizeof(__vector_type_t<_Tp, _Width>),
2634  __vector_type_t<_Tp, _Width>>;
2635 
2636  static_assert(__is_vectorizable_v<_Tp>);
2637  static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2638 
2639  using _BuiltinType = __vector_type_t<_Tp, _Width>;
2640  using value_type = _Tp;
2641 
2642  static inline constexpr size_t _S_full_size
2643  = sizeof(_BuiltinType) / sizeof(value_type);
2644  static inline constexpr int _S_size = _Width;
2645  static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2646 
2647  using _Base::_M_data;
2648 
2649  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2650  __as_full_vector() const
2651  { return _M_data; }
2652 
2653  _GLIBCXX_SIMD_INTRINSIC constexpr
2654  _SimdWrapper(initializer_list<_Tp> __init)
2655  : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2656  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2657  return __init.begin()[__i.value];
2658  })) {}
2659 
2660  _GLIBCXX_SIMD_INTRINSIC constexpr
2661  _SimdWrapper() = default;
2662 
2663  _GLIBCXX_SIMD_INTRINSIC constexpr
2664  _SimdWrapper(const _SimdWrapper&) = default;
2665 
2666  _GLIBCXX_SIMD_INTRINSIC constexpr
2667  _SimdWrapper(_SimdWrapper&&) = default;
2668 
2669  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2670  operator=(const _SimdWrapper&) = default;
2671 
2672  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2673  operator=(_SimdWrapper&&) = default;
2674 
2675  template <typename _V, typename = enable_if_t<disjunction_v<
2676  is_same<_V, __vector_type_t<_Tp, _Width>>,
2677  is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
2678  _GLIBCXX_SIMD_INTRINSIC constexpr
2679  _SimdWrapper(_V __x)
2680  // __vector_bitcast can convert e.g. __m128 to __vector(2) float
2681  : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2682 
2683  template <typename... _As,
2684  typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2685  && sizeof...(_As) <= _Width)>>
2686  _GLIBCXX_SIMD_INTRINSIC constexpr
2687  operator _SimdTuple<_Tp, _As...>() const
2688  {
2689  return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
2690  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2691  { return _M_data[int(__i)]; });
2692  }
2693 
2694  _GLIBCXX_SIMD_INTRINSIC constexpr
2695  operator const _BuiltinType&() const
2696  { return _M_data; }
2697 
2698  _GLIBCXX_SIMD_INTRINSIC constexpr
2699  operator _BuiltinType&()
2700  { return _M_data; }
2701 
2702  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2703  operator[](size_t __i) const
2704  { return _M_data[__i]; }
2705 
2706  template <size_t __i>
2707  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2708  operator[](_SizeConstant<__i>) const
2709  { return _M_data[__i]; }
2710 
2711  _GLIBCXX_SIMD_INTRINSIC constexpr void
2712  _M_set(size_t __i, _Tp __x)
2713  {
2714  if (__builtin_is_constant_evaluated())
2715  _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
2716  return __j == __i ? __x : _M_data[__j()];
2717  });
2718  else
2719  _M_data[__i] = __x;
2720  }
2721 
2722  _GLIBCXX_SIMD_INTRINSIC
2723  constexpr bool
2724  _M_is_constprop() const
2725  { return __builtin_constant_p(_M_data); }
2726 
2727  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2728  _M_is_constprop_none_of() const
2729  {
2730  if (__builtin_constant_p(_M_data))
2731  {
2732  bool __r = true;
2733  if constexpr (is_floating_point_v<_Tp>)
2734  {
2735  using _Ip = __int_for_sizeof_t<_Tp>;
2736  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2737  __execute_n_times<_Width>(
2738  [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2739  }
2740  else
2741  __execute_n_times<_Width>(
2742  [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2743  if (__builtin_constant_p(__r))
2744  return __r;
2745  }
2746  return false;
2747  }
2748 
2749  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2750  _M_is_constprop_all_of() const
2751  {
2752  if (__builtin_constant_p(_M_data))
2753  {
2754  bool __r = true;
2755  if constexpr (is_floating_point_v<_Tp>)
2756  {
2757  using _Ip = __int_for_sizeof_t<_Tp>;
2758  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2759  __execute_n_times<_Width>(
2760  [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2761  }
2762  else
2763  __execute_n_times<_Width>(
2764  [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2765  if (__builtin_constant_p(__r))
2766  return __r;
2767  }
2768  return false;
2769  }
2770  };
2771 
2772 // }}}
2773 
2774 // __vectorized_sizeof {{{
2775 template <typename _Tp>
2776  constexpr size_t
2777  __vectorized_sizeof()
2778  {
2779  if constexpr (!__is_vectorizable_v<_Tp>)
2780  return 0;
2781 
2782  if constexpr (sizeof(_Tp) <= 8)
2783  {
2784  // X86:
2785  if constexpr (__have_avx512bw)
2786  return 64;
2787  if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2788  return 64;
2789  if constexpr (__have_avx2)
2790  return 32;
2791  if constexpr (__have_avx && is_floating_point_v<_Tp>)
2792  return 32;
2793  if constexpr (__have_sse2)
2794  return 16;
2795  if constexpr (__have_sse && is_same_v<_Tp, float>)
2796  return 16;
2797  /* The following is too much trouble because of mixed MMX and x87 code.
2798  * While nothing here explicitly calls MMX instructions of registers,
2799  * they are still emitted but no EMMS cleanup is done.
2800  if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2801  return 8;
2802  */
2803 
2804  // PowerPC:
2805  if constexpr (__have_power8vec
2806  || (__have_power_vmx && (sizeof(_Tp) < 8))
2807  || (__have_power_vsx && is_floating_point_v<_Tp>) )
2808  return 16;
2809 
2810  // ARM:
2811  if constexpr (__have_neon_a64
2812  || (__have_neon_a32 && !is_same_v<_Tp, double>) )
2813  return 16;
2814  if constexpr (__have_neon
2815  && sizeof(_Tp) < 8
2816  // Only allow fp if the user allows non-ICE559 fp (e.g.
2817  // via -ffast-math). ARMv7 NEON fp is not conforming to
2818  // IEC559.
2819  && (__support_neon_float || !is_floating_point_v<_Tp>))
2820  return 16;
2821  }
2822 
2823  return sizeof(_Tp);
2824  }
2825 
2826 // }}}
2827 namespace simd_abi {
2828 // most of simd_abi is defined in simd_detail.h
2829 template <typename _Tp>
2830  inline constexpr int max_fixed_size
2831  = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2832 
2833 // compatible {{{
2834 #if defined __x86_64__ || defined __aarch64__
2835 template <typename _Tp>
2836  using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2837 #elif defined __ARM_NEON
2838 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2839 // ABI?)
2840 template <typename _Tp>
2841  using compatible
2842  = conditional_t<(sizeof(_Tp) < 8
2843  && (__support_neon_float || !is_floating_point_v<_Tp>)),
2844  _VecBuiltin<16>, scalar>;
2845 #else
2846 template <typename>
2847  using compatible = scalar;
2848 #endif
2849 
2850 // }}}
2851 // native {{{
2852 template <typename _Tp>
2853  constexpr auto
2854  __determine_native_abi()
2855  {
2856  constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2857  if constexpr (__bytes == sizeof(_Tp))
2858  return static_cast<scalar*>(nullptr);
2859  else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2860  return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2861  else
2862  return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2863  }
2864 
2865 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2866  using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2867 
2868 // }}}
2869 // __default_abi {{{
2870 #if defined _GLIBCXX_SIMD_DEFAULT_ABI
2871 template <typename _Tp>
2872  using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2873 #else
2874 template <typename _Tp>
2875  using __default_abi = compatible<_Tp>;
2876 #endif
2877 
2878 // }}}
2879 } // namespace simd_abi
2880 
2881 // traits {{{1
2882 template <typename _Tp>
2883  struct is_simd_flag_type
2884  : false_type
2885  {};
2886 
2887 template <>
2888  struct is_simd_flag_type<element_aligned_tag>
2889  : true_type
2890  {};
2891 
2892 template <>
2893  struct is_simd_flag_type<vector_aligned_tag>
2894  : true_type
2895  {};
2896 
2897 template <size_t _Np>
2898  struct is_simd_flag_type<overaligned_tag<_Np>>
2899  : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
2900  {};
2901 
2902 template <typename _Tp>
2903  inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
2904 
2905 template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
2906  using _IsSimdFlagType = _Tp;
2907 
2908 // is_abi_tag {{{2
2909 template <typename _Tp, typename = void_t<>>
2910  struct is_abi_tag : false_type {};
2911 
2912 template <typename _Tp>
2913  struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
2914  : public _Tp::_IsValidAbiTag {};
2915 
2916 template <typename _Tp>
2917  inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
2918 
2919 // is_simd(_mask) {{{2
2920 template <typename _Tp>
2921  struct is_simd : public false_type {};
2922 
2923 template <typename _Tp>
2924  inline constexpr bool is_simd_v = is_simd<_Tp>::value;
2925 
2926 template <typename _Tp>
2927  struct is_simd_mask : public false_type {};
2928 
2929 template <typename _Tp>
2930 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
2931 
2932 // simd_size {{{2
2933 template <typename _Tp, typename _Abi, typename = void>
2934  struct __simd_size_impl {};
2935 
2936 template <typename _Tp, typename _Abi>
2937  struct __simd_size_impl<
2938  _Tp, _Abi,
2939  enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
2940  : _SizeConstant<_Abi::template _S_size<_Tp>> {};
2941 
2942 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2943  struct simd_size : __simd_size_impl<_Tp, _Abi> {};
2944 
2945 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2946  inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
2947 
2948 // simd_abi::deduce {{{2
2949 template <typename _Tp, size_t _Np, typename = void>
2950  struct __deduce_impl;
2951 
2952 namespace simd_abi {
2953 /**
2954  * @tparam _Tp The requested `value_type` for the elements.
2955  * @tparam _Np The requested number of elements.
2956  * @tparam _Abis This parameter is ignored, since this implementation cannot
2957  * make any use of it. Either __a good native ABI is matched and used as `type`
2958  * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
2959  * the best matching native ABIs.
2960  */
2961 template <typename _Tp, size_t _Np, typename...>
2962  struct deduce : __deduce_impl<_Tp, _Np> {};
2963 
2964 template <typename _Tp, size_t _Np, typename... _Abis>
2965  using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
2966 } // namespace simd_abi
2967 
2968 // }}}2
2969 // rebind_simd {{{2
2970 template <typename _Tp, typename _V, typename = void>
2971  struct rebind_simd;
2972 
2973 template <typename _Tp, typename _Up, typename _Abi>
2974  struct rebind_simd<_Tp, simd<_Up, _Abi>,
2975  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2976  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
2977 
2978 template <typename _Tp, typename _Up, typename _Abi>
2979  struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
2980  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2981  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
2982 
2983 template <typename _Tp, typename _V>
2984  using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
2985 
2986 // resize_simd {{{2
2987 template <int _Np, typename _V, typename = void>
2988  struct resize_simd;
2989 
2990 template <int _Np, typename _Tp, typename _Abi>
2991  struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2992  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2993 
2994 template <int _Np, typename _Tp, typename _Abi>
2995  struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2996  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2997 
2998 template <int _Np, typename _V>
2999  using resize_simd_t = typename resize_simd<_Np, _V>::type;
3000 
3001 // }}}2
3002 // memory_alignment {{{2
3003 template <typename _Tp, typename _Up = typename _Tp::value_type>
3004  struct memory_alignment
3005  : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
3006 
3007 template <typename _Tp, typename _Up = typename _Tp::value_type>
3008  inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
3009 
3010 // class template simd [simd] {{{1
3011 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3012  class simd;
3013 
3014 template <typename _Tp, typename _Abi>
3015  struct is_simd<simd<_Tp, _Abi>> : public true_type {};
3016 
3017 template <typename _Tp>
3018  using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
3019 
3020 template <typename _Tp, int _Np>
3021  using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
3022 
3023 template <typename _Tp, size_t _Np>
3024  using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3025 
3026 // class template simd_mask [simd_mask] {{{1
3027 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3028  class simd_mask;
3029 
3030 template <typename _Tp, typename _Abi>
3031  struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
3032 
3033 template <typename _Tp>
3034  using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
3035 
3036 template <typename _Tp, int _Np>
3037  using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
3038 
3039 template <typename _Tp, size_t _Np>
3040  using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3041 
3042 // casts [simd.casts] {{{1
3043 // static_simd_cast {{{2
3044 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
3045  struct __static_simd_cast_return_type;
3046 
3047 template <typename _Tp, typename _A0, typename _Up, typename _Ap>
3048  struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
3049  : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
3050 
3051 template <typename _Tp, typename _Up, typename _Ap>
3052  struct __static_simd_cast_return_type<
3053  _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
3054  { using type = _Tp; };
3055 
3056 template <typename _Tp, typename _Ap>
3057  struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
3058 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3059  enable_if_t<__is_vectorizable_v<_Tp>>
3060 #else
3061  void
3062 #endif
3063  >
3064  { using type = simd<_Tp, _Ap>; };
3065 
3066 template <typename _Tp, typename = void>
3067  struct __safe_make_signed { using type = _Tp;};
3068 
3069 template <typename _Tp>
3070  struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
3071  {
3072  // the extra make_unsigned_t is because of PR85951
3073  using type = make_signed_t<make_unsigned_t<_Tp>>;
3074  };
3075 
3076 template <typename _Tp>
3077  using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
3078 
3079 template <typename _Tp, typename _Up, typename _Ap>
3080  struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3081 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3082  enable_if_t<__is_vectorizable_v<_Tp>>
3083 #else
3084  void
3085 #endif
3086  >
3087  {
3088  using type = conditional_t<
3089  (is_integral_v<_Up> && is_integral_v<_Tp> &&
3090 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3091  is_signed_v<_Up> != is_signed_v<_Tp> &&
3092 #endif
3093  is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3094  simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3095  };
3096 
3097 template <typename _Tp, typename _Up, typename _Ap,
3098  typename _R
3099  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3100  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3101  static_simd_cast(const simd<_Up, _Ap>& __x)
3102  {
3103  if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3104  return __x;
3105  else
3106  {
3107  _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3108  __c;
3109  return _R(__private_init, __c(__data(__x)));
3110  }
3111  }
3112 
3113 namespace __proposed {
3114 template <typename _Tp, typename _Up, typename _Ap,
3115  typename _R
3116  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3117  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3118  static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3119  {
3120  using _RM = typename _R::mask_type;
3121  return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3122  typename _RM::simd_type::value_type>(__x)};
3123  }
3124 
3125 template <typename _To, typename _Up, typename _Abi>
3126  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3127  _To
3128  simd_bit_cast(const simd<_Up, _Abi>& __x)
3129  {
3130  using _Tp = typename _To::value_type;
3131  using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3132  using _From = simd<_Up, _Abi>;
3133  using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3134  // with concepts, the following should be constraints
3135  static_assert(sizeof(_To) == sizeof(_From));
3136  static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3137  static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3138 #if __has_builtin(__builtin_bit_cast)
3139  return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3140 #else
3141  return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3142 #endif
3143  }
3144 
3145 template <typename _To, typename _Up, typename _Abi>
3146  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3147  _To
3148  simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3149  {
3150  using _From = simd_mask<_Up, _Abi>;
3151  static_assert(sizeof(_To) == sizeof(_From));
3152  static_assert(is_trivially_copyable_v<_From>);
3153  // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3154  // copyable.
3155  if constexpr (is_simd_v<_To>)
3156  {
3157  using _Tp = typename _To::value_type;
3158  using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3159  static_assert(is_trivially_copyable_v<_ToMember>);
3160 #if __has_builtin(__builtin_bit_cast)
3161  return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3162 #else
3163  return {__private_init, __bit_cast<_ToMember>(__x)};
3164 #endif
3165  }
3166  else
3167  {
3168  static_assert(is_trivially_copyable_v<_To>);
3169 #if __has_builtin(__builtin_bit_cast)
3170  return __builtin_bit_cast(_To, __x);
3171 #else
3172  return __bit_cast<_To>(__x);
3173 #endif
3174  }
3175  }
3176 } // namespace __proposed
3177 
3178 // simd_cast {{{2
3179 template <typename _Tp, typename _Up, typename _Ap,
3180  typename _To = __value_type_or_identity_t<_Tp>>
3181  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3182  simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3183  -> decltype(static_simd_cast<_Tp>(__x))
3184  { return static_simd_cast<_Tp>(__x); }
3185 
3186 namespace __proposed {
3187 template <typename _Tp, typename _Up, typename _Ap,
3188  typename _To = __value_type_or_identity_t<_Tp>>
3189  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3190  simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3191  -> decltype(static_simd_cast<_Tp>(__x))
3192  { return static_simd_cast<_Tp>(__x); }
3193 } // namespace __proposed
3194 
3195 // }}}2
3196 // resizing_simd_cast {{{
3197 namespace __proposed {
3198 /* Proposed spec:
3199 
3200 template <class T, class U, class Abi>
3201 T resizing_simd_cast(const simd<U, Abi>& x)
3202 
3203 p1 Constraints:
3204  - is_simd_v<T> is true and
3205  - T::value_type is the same type as U
3206 
3207 p2 Returns:
3208  A simd object with the i^th element initialized to x[i] for all i in the
3209  range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3210  than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3211 
3212 template <class T, class U, class Abi>
3213 T resizing_simd_cast(const simd_mask<U, Abi>& x)
3214 
3215 p1 Constraints: is_simd_mask_v<T> is true
3216 
3217 p2 Returns:
3218  A simd_mask object with the i^th element initialized to x[i] for all i in
3219 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3220  than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3221 
3222  */
3223 
3224 template <typename _Tp, typename _Up, typename _Ap>
3225  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3226  conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3227  resizing_simd_cast(const simd<_Up, _Ap>& __x)
3228  {
3229  if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3230  return __x;
3231  else if (__builtin_is_constant_evaluated())
3232  return _Tp([&](auto __i) constexpr {
3233  return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
3234  });
3235  else if constexpr (simd_size_v<_Up, _Ap> == 1)
3236  {
3237  _Tp __r{};
3238  __r[0] = __x[0];
3239  return __r;
3240  }
3241  else if constexpr (_Tp::size() == 1)
3242  return __x[0];
3243  else if constexpr (sizeof(_Tp) == sizeof(__x)
3244  && !__is_fixed_size_abi_v<_Ap>)
3245  return {__private_init,
3246  __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3247  _Ap::_S_masked(__data(__x))._M_data)};
3248  else
3249  {
3250  _Tp __r{};
3251  __builtin_memcpy(&__data(__r), &__data(__x),
3252  sizeof(_Up)
3253  * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3254  return __r;
3255  }
3256  }
3257 
3258 template <typename _Tp, typename _Up, typename _Ap>
3259  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3260  enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3261  resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3262  {
3263  return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3264  typename _Tp::simd_type::value_type>(__x)};
3265  }
3266 } // namespace __proposed
3267 
3268 // }}}
3269 // to_fixed_size {{{2
3270 template <typename _Tp, int _Np>
3271  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3272  to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3273  { return __x; }
3274 
3275 template <typename _Tp, int _Np>
3276  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3277  to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3278  { return __x; }
3279 
3280 template <typename _Tp, typename _Ap>
3281  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
3282  to_fixed_size(const simd<_Tp, _Ap>& __x)
3283  {
3284  using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
3285  return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3286  }
3287 
3288 template <typename _Tp, typename _Ap>
3289  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
3290  to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3291  {
3292  return {__private_init,
3293  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
3294  }
3295 
3296 // to_native {{{2
3297 template <typename _Tp, int _Np>
3298  _GLIBCXX_SIMD_INTRINSIC
3299  enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3300  to_native(const fixed_size_simd<_Tp, _Np>& __x)
3301  {
3302  alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3303  __x.copy_to(__mem, vector_aligned);
3304  return {__mem, vector_aligned};
3305  }
3306 
3307 template <typename _Tp, int _Np>
3308  _GLIBCXX_SIMD_INTRINSIC
3309  enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3310  to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3311  {
3312  return native_simd_mask<_Tp>(
3313  __private_init,
3314  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3315  }
3316 
3317 // to_compatible {{{2
3318 template <typename _Tp, int _Np>
3319  _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3320  to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3321  {
3322  alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3323  __x.copy_to(__mem, vector_aligned);
3324  return {__mem, vector_aligned};
3325  }
3326 
3327 template <typename _Tp, int _Np>
3328  _GLIBCXX_SIMD_INTRINSIC
3329  enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3330  to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3331  {
3332  return simd_mask<_Tp>(
3333  __private_init,
3334  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3335  }
3336 
3337 // masked assignment [simd_mask.where] {{{1
3338 
3339 // where_expression {{{1
3340 // const_where_expression<M, T> {{{2
3341 template <typename _M, typename _Tp>
3342  class const_where_expression
3343  {
3344  using _V = _Tp;
3345  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3346 
3347  struct _Wrapper { using value_type = _V; };
3348 
3349  protected:
3350  using _Impl = typename _V::_Impl;
3351 
3352  using value_type =
3353  typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3354 
3355  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3356  __get_mask(const const_where_expression& __x)
3357  { return __x._M_k; }
3358 
3359  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3360  __get_lvalue(const const_where_expression& __x)
3361  { return __x._M_value; }
3362 
3363  const _M& _M_k;
3364  _Tp& _M_value;
3365 
3366  public:
3367  const_where_expression(const const_where_expression&) = delete;
3368 
3369  const_where_expression& operator=(const const_where_expression&) = delete;
3370 
3371  _GLIBCXX_SIMD_INTRINSIC constexpr
3372  const_where_expression(const _M& __kk, const _Tp& dd)
3373  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3374 
3375  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3376  operator-() const&&
3377  {
3378  return {__private_init,
3379  _Impl::template _S_masked_unary<negate>(__data(_M_k),
3380  __data(_M_value))};
3381  }
3382 
3383  template <typename _Up, typename _Flags>
3384  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3385  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3386  {
3387  return {__private_init,
3388  _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3389  _Flags::template _S_apply<_V>(__mem))};
3390  }
3391 
3392  template <typename _Up, typename _Flags>
3393  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3394  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3395  {
3396  _Impl::_S_masked_store(__data(_M_value),
3397  _Flags::template _S_apply<_V>(__mem),
3398  __data(_M_k));
3399  }
3400  };
3401 
3402 // const_where_expression<bool, T> {{{2
3403 template <typename _Tp>
3404  class const_where_expression<bool, _Tp>
3405  {
3406  using _M = bool;
3407  using _V = _Tp;
3408 
3409  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3410 
3411  struct _Wrapper { using value_type = _V; };
3412 
3413  protected:
3414  using value_type
3415  = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3416 
3417  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3418  __get_mask(const const_where_expression& __x)
3419  { return __x._M_k; }
3420 
3421  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3422  __get_lvalue(const const_where_expression& __x)
3423  { return __x._M_value; }
3424 
3425  const bool _M_k;
3426  _Tp& _M_value;
3427 
3428  public:
3429  const_where_expression(const const_where_expression&) = delete;
3430  const_where_expression& operator=(const const_where_expression&) = delete;
3431 
3432  _GLIBCXX_SIMD_INTRINSIC constexpr
3433  const_where_expression(const bool __kk, const _Tp& dd)
3434  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3435 
3436  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3437  operator-() const&&
3438  { return _M_k ? -_M_value : _M_value; }
3439 
3440  template <typename _Up, typename _Flags>
3441  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3442  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3443  { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3444 
3445  template <typename _Up, typename _Flags>
3446  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3447  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3448  {
3449  if (_M_k)
3450  __mem[0] = _M_value;
3451  }
3452  };
3453 
3454 // where_expression<M, T> {{{2
3455 template <typename _M, typename _Tp>
3456  class where_expression : public const_where_expression<_M, _Tp>
3457  {
3458  using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3459 
3460  static_assert(!is_const<_Tp>::value,
3461  "where_expression may only be instantiated with __a non-const "
3462  "_Tp parameter");
3463 
3464  using typename const_where_expression<_M, _Tp>::value_type;
3465  using const_where_expression<_M, _Tp>::_M_k;
3466  using const_where_expression<_M, _Tp>::_M_value;
3467 
3468  static_assert(
3469  is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3470  static_assert(_M::size() == _Tp::size(), "");
3471 
3472  _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
3473  __get_lvalue(where_expression& __x)
3474  { return __x._M_value; }
3475 
3476  public:
3477  where_expression(const where_expression&) = delete;
3478  where_expression& operator=(const where_expression&) = delete;
3479 
3480  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3481  where_expression(const _M& __kk, _Tp& dd)
3482  : const_where_expression<_M, _Tp>(__kk, dd) {}
3483 
3484  template <typename _Up>
3485  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3486  operator=(_Up&& __x) &&
3487  {
3488  _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3489  __to_value_type_or_member_type<_Tp>(
3490  static_cast<_Up&&>(__x)));
3491  }
3492 
3493 #define _GLIBCXX_SIMD_OP_(__op, __name) \
3494  template <typename _Up> \
3495  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3496  operator __op##=(_Up&& __x)&& \
3497  { \
3498  _Impl::template _S_masked_cassign( \
3499  __data(_M_k), __data(_M_value), \
3500  __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3501  [](auto __impl, auto __lhs, auto __rhs) \
3502  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
3503  { return __impl.__name(__lhs, __rhs); }); \
3504  } \
3505  static_assert(true)
3506  _GLIBCXX_SIMD_OP_(+, _S_plus);
3507  _GLIBCXX_SIMD_OP_(-, _S_minus);
3508  _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3509  _GLIBCXX_SIMD_OP_(/, _S_divides);
3510  _GLIBCXX_SIMD_OP_(%, _S_modulus);
3511  _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3512  _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3513  _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3514  _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3515  _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3516 #undef _GLIBCXX_SIMD_OP_
3517 
3518  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3519  operator++() &&
3520  {
3521  __data(_M_value)
3522  = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3523  }
3524 
3525  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3526  operator++(int) &&
3527  {
3528  __data(_M_value)
3529  = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3530  }
3531 
3532  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3533  operator--() &&
3534  {
3535  __data(_M_value)
3536  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3537  }
3538 
3539  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3540  operator--(int) &&
3541  {
3542  __data(_M_value)
3543  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3544  }
3545 
3546  // intentionally hides const_where_expression::copy_from
3547  template <typename _Up, typename _Flags>
3548  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3549  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3550  {
3551  __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3552  _Flags::template _S_apply<_Tp>(__mem));
3553  }
3554  };
3555 
3556 // where_expression<bool, T> {{{2
3557 template <typename _Tp>
3558  class where_expression<bool, _Tp>
3559  : public const_where_expression<bool, _Tp>
3560  {
3561  using _M = bool;
3562  using typename const_where_expression<_M, _Tp>::value_type;
3563  using const_where_expression<_M, _Tp>::_M_k;
3564  using const_where_expression<_M, _Tp>::_M_value;
3565 
3566  public:
3567  where_expression(const where_expression&) = delete;
3568  where_expression& operator=(const where_expression&) = delete;
3569 
3570  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3571  where_expression(const _M& __kk, _Tp& dd)
3572  : const_where_expression<_M, _Tp>(__kk, dd) {}
3573 
3574 #define _GLIBCXX_SIMD_OP_(__op) \
3575  template <typename _Up> \
3576  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3577  operator __op(_Up&& __x)&& \
3578  { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3579 
3580  _GLIBCXX_SIMD_OP_(=)
3581  _GLIBCXX_SIMD_OP_(+=)
3582  _GLIBCXX_SIMD_OP_(-=)
3583  _GLIBCXX_SIMD_OP_(*=)
3584  _GLIBCXX_SIMD_OP_(/=)
3585  _GLIBCXX_SIMD_OP_(%=)
3586  _GLIBCXX_SIMD_OP_(&=)
3587  _GLIBCXX_SIMD_OP_(|=)
3588  _GLIBCXX_SIMD_OP_(^=)
3589  _GLIBCXX_SIMD_OP_(<<=)
3590  _GLIBCXX_SIMD_OP_(>>=)
3591  #undef _GLIBCXX_SIMD_OP_
3592 
3593  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3594  operator++() &&
3595  { if (_M_k) ++_M_value; }
3596 
3597  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3598  operator++(int) &&
3599  { if (_M_k) ++_M_value; }
3600 
3601  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3602  operator--() &&
3603  { if (_M_k) --_M_value; }
3604 
3605  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3606  operator--(int) &&
3607  { if (_M_k) --_M_value; }
3608 
3609  // intentionally hides const_where_expression::copy_from
3610  template <typename _Up, typename _Flags>
3611  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3612  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3613  { if (_M_k) _M_value = __mem[0]; }
3614  };
3615 
3616 // where {{{1
3617 template <typename _Tp, typename _Ap>
3618  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3619  where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3620  where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3621  { return {__k, __value}; }
3622 
3623 template <typename _Tp, typename _Ap>
3624  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3625  const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3626  where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
3627  { return {__k, __value}; }
3628 
3629 template <typename _Tp, typename _Ap>
3630  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3631  where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3632  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
3633  { return {__k, __value}; }
3634 
3635 template <typename _Tp, typename _Ap>
3636  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3637  const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3638  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
3639  { return {__k, __value}; }
3640 
3641 template <typename _Tp>
3642  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
3643  where(_ExactBool __k, _Tp& __value)
3644  { return {__k, __value}; }
3645 
3646 template <typename _Tp>
3647  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
3648  where(_ExactBool __k, const _Tp& __value)
3649  { return {__k, __value}; }
3650 
3651 template <typename _Tp, typename _Ap>
3652  _GLIBCXX_SIMD_CONSTEXPR void
3653  where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3654 
3655 template <typename _Tp, typename _Ap>
3656  _GLIBCXX_SIMD_CONSTEXPR void
3657  where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3658 
3659 // proposed mask iterations {{{1
3660 namespace __proposed {
3661 template <size_t _Np>
3662  class where_range
3663  {
3664  const bitset<_Np> __bits;
3665 
3666  public:
3667  where_range(bitset<_Np> __b) : __bits(__b) {}
3668 
3669  class iterator
3670  {
3671  size_t __mask;
3672  size_t __bit;
3673 
3674  _GLIBCXX_SIMD_INTRINSIC void
3675  __next_bit()
3676  { __bit = __builtin_ctzl(__mask); }
3677 
3678  _GLIBCXX_SIMD_INTRINSIC void
3679  __reset_lsb()
3680  {
3681  // 01100100 - 1 = 01100011
3682  __mask &= (__mask - 1);
3683  // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3684  }
3685 
3686  public:
3687  iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3688  iterator(const iterator&) = default;
3689  iterator(iterator&&) = default;
3690 
3691  _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3692  operator->() const
3693  { return __bit; }
3694 
3695  _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3696  operator*() const
3697  { return __bit; }
3698 
3699  _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
3700  operator++()
3701  {
3702  __reset_lsb();
3703  __next_bit();
3704  return *this;
3705  }
3706 
3707  _GLIBCXX_SIMD_ALWAYS_INLINE iterator
3708  operator++(int)
3709  {
3710  iterator __tmp = *this;
3711  __reset_lsb();
3712  __next_bit();
3713  return __tmp;
3714  }
3715 
3716  _GLIBCXX_SIMD_ALWAYS_INLINE bool
3717  operator==(const iterator& __rhs) const
3718  { return __mask == __rhs.__mask; }
3719 
3720  _GLIBCXX_SIMD_ALWAYS_INLINE bool
3721  operator!=(const iterator& __rhs) const
3722  { return __mask != __rhs.__mask; }
3723  };
3724 
3725  iterator
3726  begin() const
3727  { return __bits.to_ullong(); }
3728 
3729  iterator
3730  end() const
3731  { return 0; }
3732  };
3733 
3734 template <typename _Tp, typename _Ap>
3735  where_range<simd_size_v<_Tp, _Ap>>
3736  where(const simd_mask<_Tp, _Ap>& __k)
3737  { return __k.__to_bitset(); }
3738 
3739 } // namespace __proposed
3740 
3741 // }}}1
3742 // reductions [simd.reductions] {{{1
3743 template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3744  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3745  reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
3746  { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3747 
3748 template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3749  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3750  reduce(const const_where_expression<_M, _V>& __x,
3751  typename _V::value_type __identity_element, _BinaryOperation __binary_op)
3752  {
3753  if (__builtin_expect(none_of(__get_mask(__x)), false))
3754  return __identity_element;
3755 
3756  _V __tmp = __identity_element;
3757  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3758  __data(__get_lvalue(__x)));
3759  return reduce(__tmp, __binary_op);
3760  }
3761 
3762 template <typename _M, typename _V>
3763  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3764  reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3765  { return reduce(__x, 0, __binary_op); }
3766 
3767 template <typename _M, typename _V>
3768  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3769  reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3770  { return reduce(__x, 1, __binary_op); }
3771 
3772 template <typename _M, typename _V>
3773  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3774  reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3775  { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3776 
3777 template <typename _M, typename _V>
3778  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3779  reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3780  { return reduce(__x, 0, __binary_op); }
3781 
3782 template <typename _M, typename _V>
3783  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3784  reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3785  { return reduce(__x, 0, __binary_op); }
3786 
3787 template <typename _Tp, typename _Abi>
3788  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3789  hmin(const simd<_Tp, _Abi>& __v) noexcept
3790  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
3791 
3792 template <typename _Tp, typename _Abi>
3793  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3794  hmax(const simd<_Tp, _Abi>& __v) noexcept
3795  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
3796 
3797 template <typename _M, typename _V>
3798  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3799  typename _V::value_type
3800  hmin(const const_where_expression<_M, _V>& __x) noexcept
3801  {
3802  using _Tp = typename _V::value_type;
3803  constexpr _Tp __id_elem =
3804 #ifdef __FINITE_MATH_ONLY__
3805  __finite_max_v<_Tp>;
3806 #else
3807  __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3808 #endif
3809  _V __tmp = __id_elem;
3810  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3811  __data(__get_lvalue(__x)));
3812  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3813  }
3814 
3815 template <typename _M, typename _V>
3816  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3817  typename _V::value_type
3818  hmax(const const_where_expression<_M, _V>& __x) noexcept
3819  {
3820  using _Tp = typename _V::value_type;
3821  constexpr _Tp __id_elem =
3822 #ifdef __FINITE_MATH_ONLY__
3823  __finite_min_v<_Tp>;
3824 #else
3825  [] {
3826  if constexpr (__value_exists_v<__infinity, _Tp>)
3827  return -__infinity_v<_Tp>;
3828  else
3829  return __finite_min_v<_Tp>;
3830  }();
3831 #endif
3832  _V __tmp = __id_elem;
3833  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3834  __data(__get_lvalue(__x)));
3835  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3836  }
3837 
3838 // }}}1
3839 // algorithms [simd.alg] {{{
3840 template <typename _Tp, typename _Ap>
3841  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3842  min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3843  { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3844 
3845 template <typename _Tp, typename _Ap>
3846  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3847  max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3848  { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3849 
3850 template <typename _Tp, typename _Ap>
3851  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3852  pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3853  minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3854  {
3855  const auto pair_of_members
3856  = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3857  return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3858  simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3859  }
3860 
3861 template <typename _Tp, typename _Ap>
3862  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3863  clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
3864  {
3865  using _Impl = typename _Ap::_SimdImpl;
3866  return {__private_init,
3867  _Impl::_S_min(__data(__hi),
3868  _Impl::_S_max(__data(__lo), __data(__v)))};
3869  }
3870 
3871 // }}}
3872 
3873 template <size_t... _Sizes, typename _Tp, typename _Ap,
3874  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3875  inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3876  split(const simd<_Tp, _Ap>&);
3877 
3878 // __extract_part {{{
3879 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3880  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
3881  _SimdWrapper<_Tp, _Np / _Total * _Combine>
3882  __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3883 
3884 template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
3885  _GLIBCXX_SIMD_INTRINSIC constexpr auto
3886  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3887 
3888 // }}}
3889 // _SizeList {{{
3890 template <size_t _V0, size_t... _Values>
3891  struct _SizeList
3892  {
3893  template <size_t _I>
3894  static constexpr size_t
3895  _S_at(_SizeConstant<_I> = {})
3896  {
3897  if constexpr (_I == 0)
3898  return _V0;
3899  else
3900  return _SizeList<_Values...>::template _S_at<_I - 1>();
3901  }
3902 
3903  template <size_t _I>
3904  static constexpr auto
3905  _S_before(_SizeConstant<_I> = {})
3906  {
3907  if constexpr (_I == 0)
3908  return _SizeConstant<0>();
3909  else
3910  return _SizeConstant<
3911  _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
3912  }
3913 
3914  template <size_t _Np>
3915  static constexpr auto
3916  _S_pop_front(_SizeConstant<_Np> = {})
3917  {
3918  if constexpr (_Np == 0)
3919  return _SizeList();
3920  else
3921  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
3922  }
3923  };
3924 
3925 // }}}
3926 // __extract_center {{{
3927 template <typename _Tp, size_t _Np>
3928  _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
3929  __extract_center(_SimdWrapper<_Tp, _Np> __x)
3930  {
3931  static_assert(_Np >= 4);
3932  static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
3933 #if _GLIBCXX_SIMD_X86INTRIN // {{{
3934  if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
3935  {
3936  const auto __intrin = __to_intrin(__x);
3937  if constexpr (is_integral_v<_Tp>)
3938  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
3939  _mm512_shuffle_i32x4(__intrin, __intrin,
3940  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3941  else if constexpr (sizeof(_Tp) == 4)
3942  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
3943  _mm512_shuffle_f32x4(__intrin, __intrin,
3944  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3945  else if constexpr (sizeof(_Tp) == 8)
3946  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
3947  _mm512_shuffle_f64x2(__intrin, __intrin,
3948  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3949  else
3950  __assert_unreachable<_Tp>();
3951  }
3952  else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
3953  return __vector_bitcast<_Tp>(
3954  _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
3955  __hi128(__vector_bitcast<double>(__x)), 1));
3956  else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
3957  return __vector_bitcast<_Tp>(
3958  _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
3959  __lo128(__vector_bitcast<_LLong>(__x)),
3960  sizeof(_Tp) * _Np / 4));
3961  else
3962 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
3963  {
3964  __vector_type_t<_Tp, _Np / 2> __r;
3965  __builtin_memcpy(&__r,
3966  reinterpret_cast<const char*>(&__x)
3967  + sizeof(_Tp) * _Np / 4,
3968  sizeof(_Tp) * _Np / 2);
3969  return __r;
3970  }
3971  }
3972 
3973 template <typename _Tp, typename _A0, typename... _As>
3974  _GLIBCXX_SIMD_INTRINSIC
3975  _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
3976  __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
3977  {
3978  if constexpr (sizeof...(_As) == 0)
3979  return __extract_center(__x.first);
3980  else
3981  return __extract_part<1, 4, 2>(__x);
3982  }
3983 
3984 // }}}
3985 // __split_wrapper {{{
3986 template <size_t... _Sizes, typename _Tp, typename... _As>
3987  auto
3988  __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
3989  {
3990  return split<_Sizes...>(
3991  fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
3992  __x));
3993  }
3994 
3995 // }}}
3996 
3997 // split<simd>(simd) {{{
3998 template <typename _V, typename _Ap,
3999  size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
4000  enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
4001  && is_simd_v<_V>, array<_V, _Parts>>
4002  split(const simd<typename _V::value_type, _Ap>& __x)
4003  {
4004  using _Tp = typename _V::value_type;
4005  if constexpr (_Parts == 1)
4006  {
4007  return {simd_cast<_V>(__x)};
4008  }
4009  else if (__x._M_is_constprop())
4010  {
4011  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4012  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4013  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4014  { return __x[__i * _V::size() + __j]; });
4015  });
4016  }
4017  else if constexpr (
4018  __is_fixed_size_abi_v<_Ap>
4019  && (is_same_v<typename _V::abi_type, simd_abi::scalar>
4020  || (__is_fixed_size_abi_v<typename _V::abi_type>
4021  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
4022  )))
4023  {
4024  // fixed_size -> fixed_size (w/o padding) or scalar
4025 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4026  const __may_alias<_Tp>* const __element_ptr
4027  = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
4028  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4029  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4030  { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
4031 #else
4032  const auto& __xx = __data(__x);
4033  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4034  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4035  [[maybe_unused]] constexpr size_t __offset
4036  = decltype(__i)::value * _V::size();
4037  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4038  constexpr _SizeConstant<__j + __offset> __k;
4039  return __xx[__k];
4040  });
4041  });
4042 #endif
4043  }
4044  else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
4045  {
4046  // normally memcpy should work here as well
4047  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4048  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
4049  }
4050  else
4051  {
4052  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4053  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4054  if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
4055  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4056  return __x[__i * _V::size() + __j];
4057  });
4058  else
4059  return _V(__private_init,
4060  __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
4061  });
4062  }
4063  }
4064 
4065 // }}}
4066 // split<simd_mask>(simd_mask) {{{
4067 template <typename _V, typename _Ap,
4068  size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
4069  enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
4070  _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
4071  split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
4072  {
4073  if constexpr (is_same_v<_Ap, typename _V::abi_type>)
4074  return {__x};
4075  else if constexpr (_Parts == 1)
4076  return {__proposed::static_simd_cast<_V>(__x)};
4077  else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
4078  && __is_avx_abi<_Ap>())
4079  return {_V(__private_init, __lo128(__data(__x))),
4080  _V(__private_init, __hi128(__data(__x)))};
4081  else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
4082  {
4083  const bitset __bits = __x.__to_bitset();
4084  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4085  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4086  constexpr size_t __offset = __i * _V::size();
4087  return _V(__bitset_init, (__bits >> __offset).to_ullong());
4088  });
4089  }
4090  else
4091  {
4092  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4093  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4094  constexpr size_t __offset = __i * _V::size();
4095  return _V(__private_init,
4096  [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4097  return __x[__j + __offset];
4098  });
4099  });
4100  }
4101  }
4102 
4103 // }}}
4104 // split<_Sizes...>(simd) {{{
4105 template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4106  _GLIBCXX_SIMD_ALWAYS_INLINE
4107  tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4108  split(const simd<_Tp, _Ap>& __x)
4109  {
4110  using _SL = _SizeList<_Sizes...>;
4111  using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4112  constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4113  constexpr size_t _N0 = _SL::template _S_at<0>();
4114  using _V = __deduced_simd<_Tp, _N0>;
4115 
4116  if (__x._M_is_constprop())
4117  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4118  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4119  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4120  constexpr size_t __offset = _SL::_S_before(__i);
4121  return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4122  return __x[__offset + __j];
4123  });
4124  });
4125  else if constexpr (_Np == _N0)
4126  {
4127  static_assert(sizeof...(_Sizes) == 1);
4128  return {simd_cast<_V>(__x)};
4129  }
4130  else if constexpr // split from fixed_size, such that __x::first.size == _N0
4131  (__is_fixed_size_abi_v<
4132  _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4133  {
4134  static_assert(
4135  !__is_fixed_size_abi_v<typename _V::abi_type>,
4136  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4137  "fixed_size_simd "
4138  "when deduced?");
4139  // extract first and recurse (__split_wrapper is needed to deduce a new
4140  // _Sizes pack)
4141  return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4142  __split_wrapper(_SL::template _S_pop_front<1>(),
4143  __data(__x).second));
4144  }
4145  else if constexpr ((!is_same_v<simd_abi::scalar,
4146  simd_abi::deduce_t<_Tp, _Sizes>> && ...)
4147  && (!__is_fixed_size_abi_v<
4148  simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4149  {
4150  if constexpr (((_Sizes * 2 == _Np) && ...))
4151  return {{__private_init, __extract_part<0, 2>(__data(__x))},
4152  {__private_init, __extract_part<1, 2>(__data(__x))}};
4153  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4154  _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
4155  return {{__private_init, __extract_part<0, 3>(__data(__x))},
4156  {__private_init, __extract_part<1, 3>(__data(__x))},
4157  {__private_init, __extract_part<2, 3>(__data(__x))}};
4158  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4159  _SizeList<2 * _Np / 3, _Np / 3>>)
4160  return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
4161  {__private_init, __extract_part<2, 3>(__data(__x))}};
4162  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4163  _SizeList<_Np / 3, 2 * _Np / 3>>)
4164  return {{__private_init, __extract_part<0, 3>(__data(__x))},
4165  {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
4166  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4167  _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
4168  return {{__private_init, __extract_part<0, 2>(__data(__x))},
4169  {__private_init, __extract_part<2, 4>(__data(__x))},
4170  {__private_init, __extract_part<3, 4>(__data(__x))}};
4171  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4172  _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
4173  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4174  {__private_init, __extract_part<1, 4>(__data(__x))},
4175  {__private_init, __extract_part<1, 2>(__data(__x))}};
4176  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4177  _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
4178  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4179  {__private_init, __extract_center(__data(__x))},
4180  {__private_init, __extract_part<3, 4>(__data(__x))}};
4181  else if constexpr (((_Sizes * 4 == _Np) && ...))
4182  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4183  {__private_init, __extract_part<1, 4>(__data(__x))},
4184  {__private_init, __extract_part<2, 4>(__data(__x))},
4185  {__private_init, __extract_part<3, 4>(__data(__x))}};
4186  // else fall through
4187  }
4188 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4189  const __may_alias<_Tp>* const __element_ptr
4190  = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4191  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4192  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4193  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4194  constexpr size_t __offset = _SL::_S_before(__i);
4195  constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4196  constexpr size_t __a
4197  = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4198  constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4199  constexpr size_t __alignment = __b == 0 ? __a : __b;
4200  return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4201  });
4202 #else
4203  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4204  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4205  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4206  const auto& __xx = __data(__x);
4207  using _Offset = decltype(_SL::_S_before(__i));
4208  return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4209  constexpr _SizeConstant<_Offset::value + __j> __k;
4210  return __xx[__k];
4211  });
4212  });
4213 #endif
4214  }
4215 
4216 // }}}
4217 
4218 // __subscript_in_pack {{{
4219 template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4220  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4221  __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4222  {
4223  if constexpr (_I < simd_size_v<_Tp, _Ap>)
4224  return __x[_I];
4225  else
4226  return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4227  }
4228 
4229 // }}}
4230 // __store_pack_of_simd {{{
4231 template <typename _Tp, typename _A0, typename... _As>
4232  _GLIBCXX_SIMD_INTRINSIC void
4233  __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
4234  {
4235  constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4236  __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4237  if constexpr (sizeof...(__xs) > 0)
4238  __store_pack_of_simd(__mem + __n_bytes, __xs...);
4239  }
4240 
4241 // }}}
4242 // concat(simd...) {{{
4243 template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4244  inline _GLIBCXX_SIMD_CONSTEXPR
4245  simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4246  concat(const simd<_Tp, _As>&... __xs)
4247  {
4248  using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
4249  if constexpr (sizeof...(__xs) == 1)
4250  return simd_cast<_Rp>(__xs...);
4251  else if ((... && __xs._M_is_constprop()))
4252  return simd<_Tp,
4253  simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>(
4254  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4255  { return __subscript_in_pack<__i>(__xs...); });
4256  else
4257  {
4258  _Rp __r{};
4259  __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4260  return __r;
4261  }
4262  }
4263 
4264 // }}}
4265 // concat(array<simd>) {{{
4266 template <typename _Tp, typename _Abi, size_t _Np>
4267  _GLIBCXX_SIMD_ALWAYS_INLINE
4268  _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4269  concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4270  {
4271  return __call_with_subscripts<_Np>(
4272  __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4273  return concat(__xs...);
4274  });
4275  }
4276 
4277 // }}}
4278 
4279 /// @cond undocumented
4280 // _SmartReference {{{
4281 template <typename _Up, typename _Accessor = _Up,
4282  typename _ValueType = typename _Up::value_type>
4283  class _SmartReference
4284  {
4285  friend _Accessor;
4286  int _M_index;
4287  _Up& _M_obj;
4288 
4289  _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
4290  _M_read() const noexcept
4291  {
4292  if constexpr (is_arithmetic_v<_Up>)
4293  return _M_obj;
4294  else
4295  return _M_obj[_M_index];
4296  }
4297 
4298  template <typename _Tp>
4299  _GLIBCXX_SIMD_INTRINSIC constexpr void
4300  _M_write(_Tp&& __x) const
4301  { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4302 
4303  public:
4304  _GLIBCXX_SIMD_INTRINSIC constexpr
4305  _SmartReference(_Up& __o, int __i) noexcept
4306  : _M_index(__i), _M_obj(__o) {}
4307 
4308  using value_type = _ValueType;
4309 
4310  _GLIBCXX_SIMD_INTRINSIC
4311  _SmartReference(const _SmartReference&) = delete;
4312 
4313  _GLIBCXX_SIMD_INTRINSIC constexpr
4314  operator value_type() const noexcept
4315  { return _M_read(); }
4316 
4317  template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4318  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4319  operator=(_Tp&& __x) &&
4320  {
4321  _M_write(static_cast<_Tp&&>(__x));
4322  return {_M_obj, _M_index};
4323  }
4324 
4325 #define _GLIBCXX_SIMD_OP_(__op) \
4326  template <typename _Tp, \
4327  typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
4328  typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4329  typename = _ValuePreservingOrInt<_TT, value_type>> \
4330  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4331  operator __op##=(_Tp&& __x) && \
4332  { \
4333  const value_type& __lhs = _M_read(); \
4334  _M_write(__lhs __op __x); \
4335  return {_M_obj, _M_index}; \
4336  }
4337  _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4338  _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4339  _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4340 #undef _GLIBCXX_SIMD_OP_
4341 
4342  template <typename _Tp = void,
4343  typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4344  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4345  operator++() &&
4346  {
4347  value_type __x = _M_read();
4348  _M_write(++__x);
4349  return {_M_obj, _M_index};
4350  }
4351 
4352  template <typename _Tp = void,
4353  typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4354  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4355  operator++(int) &&
4356  {
4357  const value_type __r = _M_read();
4358  value_type __x = __r;
4359  _M_write(++__x);
4360  return __r;
4361  }
4362 
4363  template <typename _Tp = void,
4364  typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4365  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4366  operator--() &&
4367  {
4368  value_type __x = _M_read();
4369  _M_write(--__x);
4370  return {_M_obj, _M_index};
4371  }
4372 
4373  template <typename _Tp = void,
4374  typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4375  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4376  operator--(int) &&
4377  {
4378  const value_type __r = _M_read();
4379  value_type __x = __r;
4380  _M_write(--__x);
4381  return __r;
4382  }
4383 
4384  _GLIBCXX_SIMD_INTRINSIC friend void
4385  swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4386  conjunction<
4387  is_nothrow_constructible<value_type, _SmartReference&&>,
4388  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4389  {
4390  value_type __tmp = static_cast<_SmartReference&&>(__a);
4391  static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4392  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4393  }
4394 
4395  _GLIBCXX_SIMD_INTRINSIC friend void
4396  swap(value_type& __a, _SmartReference&& __b) noexcept(
4397  conjunction<
4398  is_nothrow_constructible<value_type, value_type&&>,
4399  is_nothrow_assignable<value_type&, value_type&&>,
4400  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4401  {
4402  value_type __tmp(std::move(__a));
4403  __a = static_cast<value_type>(__b);
4404  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4405  }
4406 
4407  _GLIBCXX_SIMD_INTRINSIC friend void
4408  swap(_SmartReference&& __a, value_type& __b) noexcept(
4409  conjunction<
4410  is_nothrow_constructible<value_type, _SmartReference&&>,
4411  is_nothrow_assignable<value_type&, value_type&&>,
4412  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4413  {
4414  value_type __tmp(__a);
4415  static_cast<_SmartReference&&>(__a) = std::move(__b);
4416  __b = std::move(__tmp);
4417  }
4418  };
4419 
4420 // }}}
4421 // __scalar_abi_wrapper {{{
4422 template <int _Bytes>
4423  struct __scalar_abi_wrapper
4424  {
4425  template <typename _Tp> static constexpr size_t _S_full_size = 1;
4426  template <typename _Tp> static constexpr size_t _S_size = 1;
4427  template <typename _Tp> static constexpr size_t _S_is_partial = false;
4428 
4429  template <typename _Tp, typename _Abi = simd_abi::scalar>
4430  static constexpr bool _S_is_valid_v
4431  = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4432  };
4433 
4434 // }}}
4435 // __decay_abi metafunction {{{
4436 template <typename _Tp>
4437  struct __decay_abi { using type = _Tp; };
4438 
4439 template <int _Bytes>
4440  struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4441  { using type = simd_abi::scalar; };
4442 
4443 // }}}
4444 // __find_next_valid_abi metafunction {{{1
4445 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4446 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4447 // recursion at 2 elements in the resulting ABI tag. In this case
4448 // type::_S_is_valid_v<_Tp> may be false.
4449 template <template <int> class _Abi, int _Bytes, typename _Tp>
4450  struct __find_next_valid_abi
4451  {
4452  static constexpr auto
4453  _S_choose()
4454  {
4455  constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4456  using _NextAbi = _Abi<_NextBytes>;
4457  if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4458  return _Abi<_Bytes>();
4459  else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4460  && _NextAbi::template _S_is_valid_v<_Tp>)
4461  return _NextAbi();
4462  else
4463  return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4464  }
4465 
4466  using type = decltype(_S_choose());
4467  };
4468 
4469 template <int _Bytes, typename _Tp>
4470  struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4471  { using type = simd_abi::scalar; };
4472 
4473 // _AbiList {{{1
4474 template <template <int> class...>
4475  struct _AbiList
4476  {
4477  template <typename, int> static constexpr bool _S_has_valid_abi = false;
4478  template <typename, int> using _FirstValidAbi = void;
4479  template <typename, int> using _BestAbi = void;
4480  };
4481 
4482 template <template <int> class _A0, template <int> class... _Rest>
4483  struct _AbiList<_A0, _Rest...>
4484  {
4485  template <typename _Tp, int _Np>
4486  static constexpr bool _S_has_valid_abi
4487  = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4488  _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4489 
4490  template <typename _Tp, int _Np>
4491  using _FirstValidAbi = conditional_t<
4492  _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4493  typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4494  typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4495 
4496  template <typename _Tp, int _Np>
4497  static constexpr auto
4498  _S_determine_best_abi()
4499  {
4500  static_assert(_Np >= 1);
4501  constexpr int _Bytes = sizeof(_Tp) * _Np;
4502  if constexpr (_Np == 1)
4503  return __make_dependent_t<_Tp, simd_abi::scalar>{};
4504  else
4505  {
4506  constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4507  // _A0<_Bytes> is good if:
4508  // 1. The ABI tag is valid for _Tp
4509  // 2. The storage overhead is no more than padding to fill the next
4510  // power-of-2 number of bytes
4511  if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4512  _Tp> && __fullsize / 2 < _Np)
4513  return typename __decay_abi<_A0<_Bytes>>::type{};
4514  else
4515  {
4516  using _Bp =
4517  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4518  if constexpr (_Bp::template _S_is_valid_v<
4519  _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4520  return _Bp{};
4521  else
4522  return
4523  typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4524  }
4525  }
4526  }
4527 
4528  template <typename _Tp, int _Np>
4529  using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4530  };
4531 
4532 // }}}1
4533 
4534 // the following lists all native ABIs, which makes them accessible to
4535 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4536 // matters: Whatever comes first has higher priority.
4537 using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4538  __scalar_abi_wrapper>;
4539 
4540 // valid _SimdTraits specialization {{{1
4541 template <typename _Tp, typename _Abi>
4542  struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4543  : _Abi::template __traits<_Tp> {};
4544 
4545 // __deduce_impl specializations {{{1
4546 // try all native ABIs (including scalar) first
4547 template <typename _Tp, size_t _Np>
4548  struct __deduce_impl<
4549  _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4550  { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4551 
4552 // fall back to fixed_size only if scalar and native ABIs don't match
4553 template <typename _Tp, size_t _Np, typename = void>
4554  struct __deduce_fixed_size_fallback {};
4555 
4556 template <typename _Tp, size_t _Np>
4557  struct __deduce_fixed_size_fallback<_Tp, _Np,
4558  enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4559  { using type = simd_abi::fixed_size<_Np>; };
4560 
4561 template <typename _Tp, size_t _Np, typename>
4562  struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4563 
4564 //}}}1
4565 /// @endcond
4566 
4567 // simd_mask {{{
4568 template <typename _Tp, typename _Abi>
4569  class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4570  {
4571  // types, tags, and friends {{{
4572  using _Traits = _SimdTraits<_Tp, _Abi>;
4573  using _MemberType = typename _Traits::_MaskMember;
4574 
4575  // We map all masks with equal element sizeof to a single integer type, the
4576  // one given by __int_for_sizeof_t<_Tp>. This is the approach
4577  // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4578  // template specializations in the implementation classes.
4579  using _Ip = __int_for_sizeof_t<_Tp>;
4580  static constexpr _Ip* _S_type_tag = nullptr;
4581 
4582  friend typename _Traits::_MaskBase;
4583  friend class simd<_Tp, _Abi>; // to construct masks on return
4584  friend typename _Traits::_SimdImpl; // to construct masks on return and
4585  // inspect data on masked operations
4586  public:
4587  using _Impl = typename _Traits::_MaskImpl;
4588  friend _Impl;
4589 
4590  // }}}
4591  // member types {{{
4592  using value_type = bool;
4593  using reference = _SmartReference<_MemberType, _Impl, value_type>;
4594  using simd_type = simd<_Tp, _Abi>;
4595  using abi_type = _Abi;
4596 
4597  // }}}
4598  static constexpr size_t size() // {{{
4599  { return __size_or_zero_v<_Tp, _Abi>; }
4600 
4601  // }}}
4602  // constructors & assignment {{{
4603  simd_mask() = default;
4604  simd_mask(const simd_mask&) = default;
4605  simd_mask(simd_mask&&) = default;
4606  simd_mask& operator=(const simd_mask&) = default;
4607  simd_mask& operator=(simd_mask&&) = default;
4608 
4609  // }}}
4610  // access to internal representation (optional feature) {{{
4611  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
4612  simd_mask(typename _Traits::_MaskCastType __init)
4613  : _M_data{__init} {}
4614  // conversions to internal type is done in _MaskBase
4615 
4616  // }}}
4617  // bitset interface (extension to be proposed) {{{
4618  // TS_FEEDBACK:
4619  // Conversion of simd_mask to and from bitset makes it much easier to
4620  // interface with other facilities. I suggest adding `static
4621  // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4622  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
4623  __from_bitset(bitset<size()> bs)
4624  { return {__bitset_init, bs}; }
4625 
4626  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
4627  __to_bitset() const
4628  { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4629 
4630  // }}}
4631  // explicit broadcast constructor {{{
4632  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4633  simd_mask(value_type __x)
4634  : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4635 
4636  // }}}
4637  // implicit type conversion constructor {{{
4638  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4639  // proposed improvement
4640  template <typename _Up, typename _A2,
4641  typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4642  _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4643  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4644  simd_mask(const simd_mask<_Up, _A2>& __x)
4645  : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4646  #else
4647  // conforming to ISO/IEC 19570:2018
4648  template <typename _Up, typename = enable_if_t<conjunction<
4649  is_same<abi_type, simd_abi::fixed_size<size()>>,
4650  is_same<_Up, _Up>>::value>>
4651  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4652  simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4653  : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4654  #endif
4655 
4656  // }}}
4657  // load constructor {{{
4658  template <typename _Flags>
4659  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4660  simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
4661  : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
4662 
4663  template <typename _Flags>
4664  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4665  simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
4666  : _M_data{}
4667  {
4668  _M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
4669  _Flags::template _S_apply<simd_mask>(__mem));
4670  }
4671 
4672  // }}}
4673  // loads [simd_mask.load] {{{
4674  template <typename _Flags>
4675  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4676  copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
4677  { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
4678 
4679  // }}}
4680  // stores [simd_mask.store] {{{
4681  template <typename _Flags>
4682  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4683  copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
4684  { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4685 
4686  // }}}
4687  // scalar access {{{
4688  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4689  operator[](size_t __i)
4690  {
4691  if (__i >= size())
4692  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4693  return {_M_data, int(__i)};
4694  }
4695 
4696  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4697  operator[](size_t __i) const
4698  {
4699  if (__i >= size())
4700  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4701  if constexpr (__is_scalar_abi<_Abi>())
4702  return _M_data;
4703  else
4704  return static_cast<bool>(_M_data[__i]);
4705  }
4706 
4707  // }}}
4708  // negation {{{
4709  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
4710  operator!() const
4711  { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4712 
4713  // }}}
4714  // simd_mask binary operators [simd_mask.binary] {{{
4715  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4716  // simd_mask<int> && simd_mask<uint> needs disambiguation
4717  template <typename _Up, typename _A2,
4718  typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4719  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4720  operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4721  {
4722  return {__private_init,
4723  _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4724  }
4725 
4726  template <typename _Up, typename _A2,
4727  typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4728  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4729  operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4730  {
4731  return {__private_init,
4732  _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4733  }
4734  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4735 
4736  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4737  operator&&(const simd_mask& __x, const simd_mask& __y)
4738  { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
4739 
4740  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4741  operator||(const simd_mask& __x, const simd_mask& __y)
4742  { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
4743 
4744  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4745  operator&(const simd_mask& __x, const simd_mask& __y)
4746  { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4747 
4748  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4749  operator|(const simd_mask& __x, const simd_mask& __y)
4750  { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4751 
4752  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4753  operator^(const simd_mask& __x, const simd_mask& __y)
4754  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4755 
4756  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4757  operator&=(simd_mask& __x, const simd_mask& __y)
4758  {
4759  __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4760  return __x;
4761  }
4762 
4763  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4764  operator|=(simd_mask& __x, const simd_mask& __y)
4765  {
4766  __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4767  return __x;
4768  }
4769 
4770  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4771  operator^=(simd_mask& __x, const simd_mask& __y)
4772  {
4773  __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4774  return __x;
4775  }
4776 
4777  // }}}
4778  // simd_mask compares [simd_mask.comparison] {{{
4779  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4780  operator==(const simd_mask& __x, const simd_mask& __y)
4781  { return !operator!=(__x, __y); }
4782 
4783  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4784  operator!=(const simd_mask& __x, const simd_mask& __y)
4785  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4786 
4787  // }}}
4788  // private_init ctor {{{
4789  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4790  simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4791  : _M_data(__init) {}
4792 
4793  // }}}
4794  // private_init generator ctor {{{
4795  template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4796  _GLIBCXX_SIMD_INTRINSIC constexpr
4797  simd_mask(_PrivateInit, _Fp&& __gen)
4798  : _M_data()
4799  {
4800  __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4801  _Impl::_S_set(_M_data, __i, __gen(__i));
4802  });
4803  }
4804 
4805  // }}}
4806  // bitset_init ctor {{{
4807  _GLIBCXX_SIMD_INTRINSIC constexpr
4808  simd_mask(_BitsetInit, bitset<size()> __init)
4809  : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4810  {}
4811 
4812  // }}}
4813  // __cvt {{{
4814  // TS_FEEDBACK:
4815  // The conversion operator this implements should be a ctor on simd_mask.
4816  // Once you call .__cvt() on a simd_mask it converts conveniently.
4817  // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4818  struct _CvtProxy
4819  {
4820  template <typename _Up, typename _A2,
4821  typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4822  _GLIBCXX_SIMD_ALWAYS_INLINE
4823  operator simd_mask<_Up, _A2>() &&
4824  {
4825  using namespace std::experimental::__proposed;
4826  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4827  }
4828 
4829  const simd_mask<_Tp, _Abi>& _M_data;
4830  };
4831 
4832  _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4833  __cvt() const
4834  { return {*this}; }
4835 
4836  // }}}
4837  // operator?: overloads (suggested extension) {{{
4838  #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4839  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4840  operator?:(const simd_mask& __k, const simd_mask& __where_true,
4841  const simd_mask& __where_false)
4842  {
4843  auto __ret = __where_false;
4844  _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4845  return __ret;
4846  }
4847 
4848  template <typename _U1, typename _U2,
4849  typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4850  typename = enable_if_t<conjunction_v<
4851  is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4852  is_convertible<simd_mask, typename _Rp::mask_type>>>>
4853  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4854  operator?:(const simd_mask& __k, const _U1& __where_true,
4855  const _U2& __where_false)
4856  {
4857  _Rp __ret = __where_false;
4858  _Rp::_Impl::_S_masked_assign(
4859  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4860  __data(static_cast<_Rp>(__where_true)));
4861  return __ret;
4862  }
4863 
4864  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4865  template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4866  typename = enable_if_t<
4867  conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4868  is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4869  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4870  operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4871  const simd_mask<_Up, _Au>& __where_false)
4872  {
4873  simd_mask __ret = __where_false;
4874  _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4875  __where_true._M_data);
4876  return __ret;
4877  }
4878  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4879  #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4880 
4881  // }}}
4882  // _M_is_constprop {{{
4883  _GLIBCXX_SIMD_INTRINSIC constexpr bool
4884  _M_is_constprop() const
4885  {
4886  if constexpr (__is_scalar_abi<_Abi>())
4887  return __builtin_constant_p(_M_data);
4888  else
4889  return _M_data._M_is_constprop();
4890  }
4891 
4892  // }}}
4893 
4894  private:
4895  friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4896  friend auto& __data<_Tp, abi_type>(simd_mask&);
4897  alignas(_Traits::_S_mask_align) _MemberType _M_data;
4898  };
4899 
4900 // }}}
4901 
4902 /// @cond undocumented
4903 // __data(simd_mask) {{{
4904 template <typename _Tp, typename _Ap>
4905  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
4906  __data(const simd_mask<_Tp, _Ap>& __x)
4907  { return __x._M_data; }
4908 
4909 template <typename _Tp, typename _Ap>
4910  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
4911  __data(simd_mask<_Tp, _Ap>& __x)
4912  { return __x._M_data; }
4913 
4914 // }}}
4915 /// @endcond
4916 
4917 // simd_mask reductions [simd_mask.reductions] {{{
4918 template <typename _Tp, typename _Abi>
4919  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4920  all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4921  {
4922  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4923  {
4924  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4925  if (!__k[__i])
4926  return false;
4927  return true;
4928  }
4929  else
4930  return _Abi::_MaskImpl::_S_all_of(__k);
4931  }
4932 
4933 template <typename _Tp, typename _Abi>
4934  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4935  any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4936  {
4937  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4938  {
4939  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4940  if (__k[__i])
4941  return true;
4942  return false;
4943  }
4944  else
4945  return _Abi::_MaskImpl::_S_any_of(__k);
4946  }
4947 
4948 template <typename _Tp, typename _Abi>
4949  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4950  none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4951  {
4952  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4953  {
4954  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4955  if (__k[__i])
4956  return false;
4957  return true;
4958  }
4959  else
4960  return _Abi::_MaskImpl::_S_none_of(__k);
4961  }
4962 
4963 template <typename _Tp, typename _Abi>
4964  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4965  some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4966  {
4967  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4968  {
4969  for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
4970  if (__k[__i] != __k[__i - 1])
4971  return true;
4972  return false;
4973  }
4974  else
4975  return _Abi::_MaskImpl::_S_some_of(__k);
4976  }
4977 
4978 template <typename _Tp, typename _Abi>
4979  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4980  popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
4981  {
4982  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4983  {
4984  const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
4985  __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4986  return ((__elements != 0) + ...);
4987  });
4988  if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
4989  return __r;
4990  }
4991  return _Abi::_MaskImpl::_S_popcount(__k);
4992  }
4993 
4994 template <typename _Tp, typename _Abi>
4995  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4996  find_first_set(const simd_mask<_Tp, _Abi>& __k)
4997  {
4998  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4999  {
5000  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5001  const size_t _Idx = __call_with_n_evaluations<_Np>(
5002  [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5003  return std::min({__indexes...});
5004  }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5005  return __k[__i] ? +__i : _Np;
5006  });
5007  if (_Idx >= _Np)
5008  __invoke_ub("find_first_set(empty mask) is UB");
5009  if (__builtin_constant_p(_Idx))
5010  return _Idx;
5011  }
5012  return _Abi::_MaskImpl::_S_find_first_set(__k);
5013  }
5014 
5015 template <typename _Tp, typename _Abi>
5016  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5017  find_last_set(const simd_mask<_Tp, _Abi>& __k)
5018  {
5019  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5020  {
5021  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5022  const int _Idx = __call_with_n_evaluations<_Np>(
5023  [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5024  return std::max({__indexes...});
5025  }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5026  return __k[__i] ? int(__i) : -1;
5027  });
5028  if (_Idx < 0)
5029  __invoke_ub("find_first_set(empty mask) is UB");
5030  if (__builtin_constant_p(_Idx))
5031  return _Idx;
5032  }
5033  return _Abi::_MaskImpl::_S_find_last_set(__k);
5034  }
5035 
5036 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5037 all_of(_ExactBool __x) noexcept
5038 { return __x; }
5039 
5040 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5041 any_of(_ExactBool __x) noexcept
5042 { return __x; }
5043 
5044 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5045 none_of(_ExactBool __x) noexcept
5046 { return !__x; }
5047 
5048 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5049 some_of(_ExactBool) noexcept
5050 { return false; }
5051 
5052 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5053 popcount(_ExactBool __x) noexcept
5054 { return __x; }
5055 
5056 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5057 find_first_set(_ExactBool)
5058 { return 0; }
5059 
5060 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5061 find_last_set(_ExactBool)
5062 { return 0; }
5063 
5064 // }}}
5065 
5066 /// @cond undocumented
5067 // _SimdIntOperators{{{1
5068 template <typename _V, typename _Tp, typename _Abi, bool>
5069  class _SimdIntOperators {};
5070 
5071 template <typename _V, typename _Tp, typename _Abi>
5072  class _SimdIntOperators<_V, _Tp, _Abi, true>
5073  {
5074  using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
5075 
5076  _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
5077  __derived() const
5078  { return *static_cast<const _V*>(this); }
5079 
5080  template <typename _Up>
5081  _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
5082  _S_make_derived(_Up&& __d)
5083  { return {__private_init, static_cast<_Up&&>(__d)}; }
5084 
5085  public:
5086  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5087  _V&
5088  operator%=(_V& __lhs, const _V& __x)
5089  { return __lhs = __lhs % __x; }
5090 
5091  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5092  _V&
5093  operator&=(_V& __lhs, const _V& __x)
5094  { return __lhs = __lhs & __x; }
5095 
5096  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5097  _V&
5098  operator|=(_V& __lhs, const _V& __x)
5099  { return __lhs = __lhs | __x; }
5100 
5101  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5102  _V&
5103  operator^=(_V& __lhs, const _V& __x)
5104  { return __lhs = __lhs ^ __x; }
5105 
5106  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5107  _V&
5108  operator<<=(_V& __lhs, const _V& __x)
5109  { return __lhs = __lhs << __x; }
5110 
5111  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5112  _V&
5113  operator>>=(_V& __lhs, const _V& __x)
5114  { return __lhs = __lhs >> __x; }
5115 
5116  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5117  _V&
5118  operator<<=(_V& __lhs, int __x)
5119  { return __lhs = __lhs << __x; }
5120 
5121  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5122  _V&
5123  operator>>=(_V& __lhs, int __x)
5124  { return __lhs = __lhs >> __x; }
5125 
5126  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5127  _V
5128  operator%(const _V& __x, const _V& __y)
5129  {
5130  return _SimdIntOperators::_S_make_derived(
5131  _Impl::_S_modulus(__data(__x), __data(__y)));
5132  }
5133 
5134  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5135  _V
5136  operator&(const _V& __x, const _V& __y)
5137  {
5138  return _SimdIntOperators::_S_make_derived(
5139  _Impl::_S_bit_and(__data(__x), __data(__y)));
5140  }
5141 
5142  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5143  _V
5144  operator|(const _V& __x, const _V& __y)
5145  {
5146  return _SimdIntOperators::_S_make_derived(
5147  _Impl::_S_bit_or(__data(__x), __data(__y)));
5148  }
5149 
5150  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5151  _V
5152  operator^(const _V& __x, const _V& __y)
5153  {
5154  return _SimdIntOperators::_S_make_derived(
5155  _Impl::_S_bit_xor(__data(__x), __data(__y)));
5156  }
5157 
5158  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5159  _V
5160  operator<<(const _V& __x, const _V& __y)
5161  {
5162  return _SimdIntOperators::_S_make_derived(
5163  _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5164  }
5165 
5166  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5167  _V
5168  operator>>(const _V& __x, const _V& __y)
5169  {
5170  return _SimdIntOperators::_S_make_derived(
5171  _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5172  }
5173 
5174  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5175  _V
5176  operator<<(const _V& __x, int __y)
5177  {
5178  if (__y < 0)
5179  __invoke_ub("The behavior is undefined if the right operand of a "
5180  "shift operation is negative. [expr.shift]\nA shift by "
5181  "%d was requested",
5182  __y);
5183  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5184  __invoke_ub(
5185  "The behavior is undefined if the right operand of a "
5186  "shift operation is greater than or equal to the width of the "
5187  "promoted left operand. [expr.shift]\nA shift by %d was requested",
5188  __y);
5189  return _SimdIntOperators::_S_make_derived(
5190  _Impl::_S_bit_shift_left(__data(__x), __y));
5191  }
5192 
5193  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5194  _V
5195  operator>>(const _V& __x, int __y)
5196  {
5197  if (__y < 0)
5198  __invoke_ub(
5199  "The behavior is undefined if the right operand of a shift "
5200  "operation is negative. [expr.shift]\nA shift by %d was requested",
5201  __y);
5202  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5203  __invoke_ub(
5204  "The behavior is undefined if the right operand of a shift "
5205  "operation is greater than or equal to the width of the promoted "
5206  "left operand. [expr.shift]\nA shift by %d was requested",
5207  __y);
5208  return _SimdIntOperators::_S_make_derived(
5209  _Impl::_S_bit_shift_right(__data(__x), __y));
5210  }
5211 
5212  // unary operators (for integral _Tp)
5213  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5214  _V
5215  operator~() const
5216  { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5217  };
5218 
5219 //}}}1
5220 /// @endcond
5221 
5222 // simd {{{
5223 template <typename _Tp, typename _Abi>
5224  class simd : public _SimdIntOperators<
5225  simd<_Tp, _Abi>, _Tp, _Abi,
5226  conjunction<is_integral<_Tp>,
5227  typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5228  public _SimdTraits<_Tp, _Abi>::_SimdBase
5229  {
5230  using _Traits = _SimdTraits<_Tp, _Abi>;
5231  using _MemberType = typename _Traits::_SimdMember;
5232  using _CastType = typename _Traits::_SimdCastType;
5233  static constexpr _Tp* _S_type_tag = nullptr;
5234  friend typename _Traits::_SimdBase;
5235 
5236  public:
5237  using _Impl = typename _Traits::_SimdImpl;
5238  friend _Impl;
5239  friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5240 
5241  using value_type = _Tp;
5242  using reference = _SmartReference<_MemberType, _Impl, value_type>;
5243  using mask_type = simd_mask<_Tp, _Abi>;
5244  using abi_type = _Abi;
5245 
5246  static constexpr size_t size()
5247  { return __size_or_zero_v<_Tp, _Abi>; }
5248 
5249  _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5250  _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5251  _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5252  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5253  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5254 
5255  // implicit broadcast constructor
5256  template <typename _Up,
5257  typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5258  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5259  simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5260  : _M_data(
5261  _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5262  {}
5263 
5264  // implicit type conversion constructor (convert from fixed_size to
5265  // fixed_size)
5266  template <typename _Up>
5267  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5268  simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5269  enable_if_t<
5270  conjunction<
5271  is_same<simd_abi::fixed_size<size()>, abi_type>,
5272  negation<__is_narrowing_conversion<_Up, value_type>>,
5273  __converts_to_higher_integer_rank<_Up, value_type>>::value,
5274  void*> = nullptr)
5275  : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5276 
5277  // explicit type conversion constructor
5278 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5279  template <typename _Up, typename _A2,
5280  typename = decltype(static_simd_cast<simd>(
5281  declval<const simd<_Up, _A2>&>()))>
5282  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5283  simd(const simd<_Up, _A2>& __x)
5284  : simd(static_simd_cast<simd>(__x)) {}
5285 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5286 
5287  // generator constructor
5288  template <typename _Fp>
5289  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5290  simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5291  declval<_SizeConstant<0>&>())),
5292  value_type>* = nullptr)
5293  : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5294 
5295  // load constructor
5296  template <typename _Up, typename _Flags>
5297  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5298  simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
5299  : _M_data(
5300  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5301  {}
5302 
5303  // loads [simd.load]
5304  template <typename _Up, typename _Flags>
5305  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5306  copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
5307  {
5308  _M_data = static_cast<decltype(_M_data)>(
5309  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5310  }
5311 
5312  // stores [simd.store]
5313  template <typename _Up, typename _Flags>
5314  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5315  copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
5316  {
5317  _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5318  _S_type_tag);
5319  }
5320 
5321  // scalar access
5322  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5323  operator[](size_t __i)
5324  { return {_M_data, int(__i)}; }
5325 
5326  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5327  operator[]([[maybe_unused]] size_t __i) const
5328  {
5329  if constexpr (__is_scalar_abi<_Abi>())
5330  {
5331  _GLIBCXX_DEBUG_ASSERT(__i == 0);
5332  return _M_data;
5333  }
5334  else
5335  return _M_data[__i];
5336  }
5337 
5338  // increment and decrement:
5339  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5340  operator++()
5341  {
5342  _Impl::_S_increment(_M_data);
5343  return *this;
5344  }
5345 
5346  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5347  operator++(int)
5348  {
5349  simd __r = *this;
5350  _Impl::_S_increment(_M_data);
5351  return __r;
5352  }
5353 
5354  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5355  operator--()
5356  {
5357  _Impl::_S_decrement(_M_data);
5358  return *this;
5359  }
5360 
5361  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5362  operator--(int)
5363  {
5364  simd __r = *this;
5365  _Impl::_S_decrement(_M_data);
5366  return __r;
5367  }
5368 
5369  // unary operators (for any _Tp)
5370  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5371  operator!() const
5372  { return {__private_init, _Impl::_S_negate(_M_data)}; }
5373 
5374  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5375  operator+() const
5376  { return *this; }
5377 
5378  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5379  operator-() const
5380  { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5381 
5382  // access to internal representation (suggested extension)
5383  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5384  simd(_CastType __init) : _M_data(__init) {}
5385 
5386  // compound assignment [simd.cassign]
5387  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5388  operator+=(simd& __lhs, const simd& __x)
5389  { return __lhs = __lhs + __x; }
5390 
5391  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5392  operator-=(simd& __lhs, const simd& __x)
5393  { return __lhs = __lhs - __x; }
5394 
5395  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5396  operator*=(simd& __lhs, const simd& __x)
5397  { return __lhs = __lhs * __x; }
5398 
5399  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5400  operator/=(simd& __lhs, const simd& __x)
5401  { return __lhs = __lhs / __x; }
5402 
5403  // binary operators [simd.binary]
5404  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5405  operator+(const simd& __x, const simd& __y)
5406  { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5407 
5408  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5409  operator-(const simd& __x, const simd& __y)
5410  { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5411 
5412  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5413  operator*(const simd& __x, const simd& __y)
5414  { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5415 
5416  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5417  operator/(const simd& __x, const simd& __y)
5418  { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5419 
5420  // compares [simd.comparison]
5421  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5422  operator==(const simd& __x, const simd& __y)
5423  { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5424 
5425  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5426  operator!=(const simd& __x, const simd& __y)
5427  {
5428  return simd::_S_make_mask(
5429  _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5430  }
5431 
5432  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5433  operator<(const simd& __x, const simd& __y)
5434  { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5435 
5436  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5437  operator<=(const simd& __x, const simd& __y)
5438  {
5439  return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5440  }
5441 
5442  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5443  operator>(const simd& __x, const simd& __y)
5444  { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5445 
5446  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5447  operator>=(const simd& __x, const simd& __y)
5448  {
5449  return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5450  }
5451 
5452  // operator?: overloads (suggested extension) {{{
5453 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5454  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5455  operator?:(const mask_type& __k, const simd& __where_true,
5456  const simd& __where_false)
5457  {
5458  auto __ret = __where_false;
5459  _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5460  return __ret;
5461  }
5462 
5463 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5464  // }}}
5465 
5466  // "private" because of the first arguments's namespace
5467  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5468  simd(_PrivateInit, const _MemberType& __init)
5469  : _M_data(__init) {}
5470 
5471  // "private" because of the first arguments's namespace
5472  _GLIBCXX_SIMD_INTRINSIC
5473  simd(_BitsetInit, bitset<size()> __init) : _M_data()
5474  { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5475 
5476  _GLIBCXX_SIMD_INTRINSIC constexpr bool
5477  _M_is_constprop() const
5478  {
5479  if constexpr (__is_scalar_abi<_Abi>())
5480  return __builtin_constant_p(_M_data);
5481  else
5482  return _M_data._M_is_constprop();
5483  }
5484 
5485  private:
5486  _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
5487  _S_make_mask(typename mask_type::_MemberType __k)
5488  { return {__private_init, __k}; }
5489 
5490  friend const auto& __data<value_type, abi_type>(const simd&);
5491  friend auto& __data<value_type, abi_type>(simd&);
5492  alignas(_Traits::_S_simd_align) _MemberType _M_data;
5493  };
5494 
5495 // }}}
5496 /// @cond undocumented
5497 // __data {{{
5498 template <typename _Tp, typename _Ap>
5499  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5500  __data(const simd<_Tp, _Ap>& __x)
5501  { return __x._M_data; }
5502 
5503 template <typename _Tp, typename _Ap>
5504  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5505  __data(simd<_Tp, _Ap>& __x)
5506  { return __x._M_data; }
5507 
5508 // }}}
5509 namespace __float_bitwise_operators { //{{{
5510 template <typename _Tp, typename _Ap>
5511  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5512  operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5513  { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
5514 
5515 template <typename _Tp, typename _Ap>
5516  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5517  operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5518  { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
5519 
5520 template <typename _Tp, typename _Ap>
5521  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5522  operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5523  { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
5524 
5525 template <typename _Tp, typename _Ap>
5526  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5527  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5528  operator~(const simd<_Tp, _Ap>& __a)
5529  { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5530 } // namespace __float_bitwise_operators }}}
5531 /// @endcond
5532 
5533 /// @}
5534 _GLIBCXX_SIMD_END_NAMESPACE
5535 
5536 #endif // __cplusplus >= 201703L
5537 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5538 
5539 // vim: foldmethod=marker foldmarker={{{,}}}
std::basic_ostream< _CharT, _Traits > & operator<<(std::basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1683
Definition: simd.h:281
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1593
constexpr auto tuple_cat(_Tpls &&... __tpls) -> typename __tuple_cat_result< _Tpls... >::__type
Create a tuple containing all elements from multiple tuple-like objects.
Definition: tuple:2141
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2614
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:233
constexpr duration< __common_rep_t< _Rep1, __disable_if_is_duration< _Rep2 > >, _Period > operator%(const duration< _Rep1, _Period > &__d, const _Rep2 &__s)
Definition: chrono.h:779
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:257
constexpr complex< _Tp > operator*(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x times y.
Definition: complex:395
void void_t
A metafunction that always yields void, used for detecting valid types.
Definition: type_traits:2632
Definition: simd.h:86
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition: type_traits:1983
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:82
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition: complex:425
typename remove_pointer< _Tp >::type remove_pointer_t
Alias template for remove_pointer.
Definition: type_traits:2065
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition: type_traits:2610
typename remove_const< _Tp >::type remove_const_t
Alias template for remove_const.
Definition: type_traits:1583
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition: type_traits:2386
constexpr bitset< _Nb > operator &(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1553
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition: complex:365
constexpr tuple< typename __decay_and_strip< _Elements >::__type... > make_tuple(_Elements &&... __args)
Create a tuple containing copies of the arguments.
Definition: tuple:1987
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:287
constexpr _If_is_unsigned_integer< _Tp, int > popcount(_Tp __x) noexcept
The number of bits set in x.
Definition: bit:426
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition: type_traits:85
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:97
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1573
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1563
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition: type_traits:1640
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition: complex:335