Where Online Learning is simpler!

The C and C++ Include Header Files

/usr/include/c++/11/experimental/bits/simd.h


$ cat -n /usr/include/c++/11/experimental/bits/simd.h

     1	// Definition of the public simd interfaces -*- C++ -*-
     2	
     3	// Copyright (C) 2020-2021 Free Software Foundation, Inc.
     4	//
     5	// This file is part of the GNU ISO C++ Library.  This library is free
     6	// software; you can redistribute it and/or modify it under the
     7	// terms of the GNU General Public License as published by the
     8	// Free Software Foundation; either version 3, or (at your option)
     9	// any later version.
    10	
    11	// This library is distributed in the hope that it will be useful,
    12	// but WITHOUT ANY WARRANTY; without even the implied warranty of
    13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14	// GNU General Public License for more details.
    15	
    16	// Under Section 7 of GPL version 3, you are granted additional
    17	// permissions described in the GCC Runtime Library Exception, version
    18	// 3.1, as published by the Free Software Foundation.
    19	
    20	// You should have received a copy of the GNU General Public License and
    21	// a copy of the GCC Runtime Library Exception along with this program;
    22	// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    23	// <http://www.gnu.org/licenses/>.
    24	
    25	#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
    26	#define _GLIBCXX_EXPERIMENTAL_SIMD_H
    27	
    28	#if __cplusplus >= 201703L
    29	
    30	#include "simd_detail.h"
    31	#include "numeric_traits.h"
    32	#include <bit>
    33	#include <bitset>
    34	#ifdef _GLIBCXX_DEBUG_UB
    35	#include <cstdio> // for stderr
    36	#endif
    37	#include <cstring>
    38	#include <functional>
    39	#include <iosfwd>
    40	#include <utility>
    41	
    42	#if _GLIBCXX_SIMD_X86INTRIN
    43	#include <x86intrin.h>
    44	#elif _GLIBCXX_SIMD_HAVE_NEON
    45	#include <arm_neon.h>
    46	#endif
    47	
    48	/** @ingroup ts_simd
    49	 * @{
    50	 */
    51	/* There are several closely related types, with the following naming
    52	 * convention:
    53	 * _Tp: vectorizable (arithmetic) type (or any type)
    54	 * _TV: __vector_type_t<_Tp, _Np>
    55	 * _TW: _SimdWrapper<_Tp, _Np>
    56	 * _TI: __intrinsic_type_t<_Tp, _Np>
    57	 * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
    58	 * If one additional type is needed use _U instead of _T.
    59	 * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
    60	 *
    61	 * More naming conventions:
    62	 * _Ap or _Abi: An ABI tag from the simd_abi namespace
    63	 * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
    64	 *      _IV, _IW as for _TV, _TW
    65	 * _Np: number of elements (not bytes)
    66	 * _Bytes: number of bytes
    67	 *
    68	 * Variable names:
    69	 * __k: mask object (vector- or bitmask)
    70	 */
    71	_GLIBCXX_SIMD_BEGIN_NAMESPACE
    72	
    73	#if !_GLIBCXX_SIMD_X86INTRIN
    74	using __m128  [[__gnu__::__vector_size__(16)]] = float;
    75	using __m128d [[__gnu__::__vector_size__(16)]] = double;
    76	using __m128i [[__gnu__::__vector_size__(16)]] = long long;
    77	using __m256  [[__gnu__::__vector_size__(32)]] = float;
    78	using __m256d [[__gnu__::__vector_size__(32)]] = double;
    79	using __m256i [[__gnu__::__vector_size__(32)]] = long long;
    80	using __m512  [[__gnu__::__vector_size__(64)]] = float;
    81	using __m512d [[__gnu__::__vector_size__(64)]] = double;
    82	using __m512i [[__gnu__::__vector_size__(64)]] = long long;
    83	#endif
    84	
    85	namespace simd_abi {
    86	// simd_abi forward declarations {{{
    87	// implementation details:
    88	struct _Scalar;
    89	
    90	template <int _Np>
    91	  struct _Fixed;
    92	
    93	// There are two major ABIs that appear on different architectures.
    94	// Both have non-boolean values packed into an N Byte register
    95	// -> #elements = N / sizeof(T)
    96	// Masks differ:
    97	// 1. Use value vector registers for masks (all 0 or all 1)
    98	// 2. Use bitmasks (mask registers) with one bit per value in the corresponding
    99	//    value vector
   100	//
   101	// Both can be partially used, masking off the rest when doing horizontal
   102	// operations or operations that can trap (e.g. FP_INVALID or integer division
   103	// by 0). This is encoded as the number of used bytes.
   104	template <int _UsedBytes>
   105	  struct _VecBuiltin;
   106	
   107	template <int _UsedBytes>
   108	  struct _VecBltnBtmsk;
   109	
   110	template <typename _Tp, int _Np>
   111	  using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
   112	
   113	template <int _UsedBytes = 16>
   114	  using _Sse = _VecBuiltin<_UsedBytes>;
   115	
   116	template <int _UsedBytes = 32>
   117	  using _Avx = _VecBuiltin<_UsedBytes>;
   118	
   119	template <int _UsedBytes = 64>
   120	  using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
   121	
   122	template <int _UsedBytes = 16>
   123	  using _Neon = _VecBuiltin<_UsedBytes>;
   124	
   125	// implementation-defined:
   126	using __sse = _Sse<>;
   127	using __avx = _Avx<>;
   128	using __avx512 = _Avx512<>;
   129	using __neon = _Neon<>;
   130	using __neon128 = _Neon<16>;
   131	using __neon64 = _Neon<8>;
   132	
   133	// standard:
   134	template <typename _Tp, size_t _Np, typename...>
   135	  struct deduce;
   136	
   137	template <int _Np>
   138	  using fixed_size = _Fixed<_Np>;
   139	
   140	using scalar = _Scalar;
   141	
   142	// }}}
   143	} // namespace simd_abi
   144	// forward declarations is_simd(_mask), simd(_mask), simd_size {{{
   145	template <typename _Tp>
   146	  struct is_simd;
   147	
   148	template <typename _Tp>
   149	  struct is_simd_mask;
   150	
   151	template <typename _Tp, typename _Abi>
   152	  class simd;
   153	
   154	template <typename _Tp, typename _Abi>
   155	  class simd_mask;
   156	
   157	template <typename _Tp, typename _Abi>
   158	  struct simd_size;
   159	
   160	// }}}
   161	// load/store flags {{{
   162	struct element_aligned_tag
   163	{
   164	  template <typename _Tp, typename _Up = typename _Tp::value_type>
   165	    static constexpr size_t _S_alignment = alignof(_Up);
   166	
   167	  template <typename _Tp, typename _Up>
   168	    _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
   169	    _S_apply(_Up* __ptr)
   170	    { return __ptr; }
   171	};
   172	
   173	struct vector_aligned_tag
   174	{
   175	  template <typename _Tp, typename _Up = typename _Tp::value_type>
   176	    static constexpr size_t _S_alignment
   177	      = std::__bit_ceil(sizeof(_Up) * _Tp::size());
   178	
   179	  template <typename _Tp, typename _Up>
   180	    _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
   181	    _S_apply(_Up* __ptr)
   182	    { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
   183	};
   184	
   185	template <size_t _Np> struct overaligned_tag
   186	{
   187	  template <typename _Tp, typename _Up = typename _Tp::value_type>
   188	    static constexpr size_t _S_alignment = _Np;
   189	
   190	  template <typename _Tp, typename _Up>
   191	    _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
   192	    _S_apply(_Up* __ptr)
   193	    { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
   194	};
   195	
   196	inline constexpr element_aligned_tag element_aligned = {};
   197	
   198	inline constexpr vector_aligned_tag vector_aligned = {};
   199	
   200	template <size_t _Np>
   201	  inline constexpr overaligned_tag<_Np> overaligned = {};
   202	
   203	// }}}
   204	template <size_t _Xp>
   205	  using _SizeConstant = integral_constant<size_t, _Xp>;
   206	
   207	namespace __detail
   208	{
   209	  struct _Minimum
   210	  {
   211	    template <typename _Tp>
   212	      _GLIBCXX_SIMD_INTRINSIC constexpr
   213	      _Tp
   214	      operator()(_Tp __a, _Tp __b) const
   215	      {
   216		using std::min;
   217		return min(__a, __b);
   218	      }
   219	  };
   220	
   221	  struct _Maximum
   222	  {
   223	    template <typename _Tp>
   224	      _GLIBCXX_SIMD_INTRINSIC constexpr
   225	      _Tp
   226	      operator()(_Tp __a, _Tp __b) const
   227	      {
   228		using std::max;
   229		return max(__a, __b);
   230	      }
   231	  };
   232	} // namespace __detail
   233	
   234	// unrolled/pack execution helpers
   235	// __execute_n_times{{{
   236	template <typename _Fp, size_t... _I>
   237	  _GLIBCXX_SIMD_INTRINSIC constexpr void
   238	  __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
   239	  { ((void)__f(_SizeConstant<_I>()), ...); }
   240	
   241	template <typename _Fp>
   242	  _GLIBCXX_SIMD_INTRINSIC constexpr void
   243	  __execute_on_index_sequence(_Fp&&, index_sequence<>)
   244	  { }
   245	
   246	template <size_t _Np, typename _Fp>
   247	  _GLIBCXX_SIMD_INTRINSIC constexpr void
   248	  __execute_n_times(_Fp&& __f)
   249	  {
   250	    __execute_on_index_sequence(static_cast<_Fp&&>(__f),
   251					make_index_sequence<_Np>{});
   252	  }
   253	
   254	// }}}
   255	// __generate_from_n_evaluations{{{
   256	template <typename _R, typename _Fp, size_t... _I>
   257	  _GLIBCXX_SIMD_INTRINSIC constexpr _R
   258	  __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
   259	  { return _R{__f(_SizeConstant<_I>())...}; }
   260	
   261	template <size_t _Np, typename _R, typename _Fp>
   262	  _GLIBCXX_SIMD_INTRINSIC constexpr _R
   263	  __generate_from_n_evaluations(_Fp&& __f)
   264	  {
   265	    return __execute_on_index_sequence_with_return<_R>(
   266	      static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
   267	  }
   268	
   269	// }}}
   270	// __call_with_n_evaluations{{{
   271	template <size_t... _I, typename _F0, typename _FArgs>
   272	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
   273	  __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
   274	  { return __f0(__fargs(_SizeConstant<_I>())...); }
   275	
   276	template <size_t _Np, typename _F0, typename _FArgs>
   277	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
   278	  __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
   279	  {
   280	    return __call_with_n_evaluations(make_index_sequence<_Np>{},
   281					     static_cast<_F0&&>(__f0),
   282					     static_cast<_FArgs&&>(__fargs));
   283	  }
   284	
   285	// }}}
   286	// __call_with_subscripts{{{
   287	template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
   288	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
   289	  __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
   290	  { return __fun(__x[_First + _It]...); }
   291	
   292	template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
   293	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
   294	  __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
   295	  {
   296	    return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
   297						  make_index_sequence<_Np>(),
   298						  static_cast<_Fp&&>(__fun));
   299	  }
   300	
   301	// }}}
   302	
   303	// vvv ---- type traits ---- vvv
   304	// integer type aliases{{{
   305	using _UChar = unsigned char;
   306	using _SChar = signed char;
   307	using _UShort = unsigned short;
   308	using _UInt = unsigned int;
   309	using _ULong = unsigned long;
   310	using _ULLong = unsigned long long;
   311	using _LLong = long long;
   312	
   313	//}}}
   314	// __first_of_pack{{{
   315	template <typename _T0, typename...>
   316	  struct __first_of_pack
   317	  { using type = _T0; };
   318	
   319	template <typename... _Ts>
   320	  using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
   321	
   322	//}}}
   323	// __value_type_or_identity_t {{{
   324	template <typename _Tp>
   325	  typename _Tp::value_type
   326	  __value_type_or_identity_impl(int);
   327	
   328	template <typename _Tp>
   329	  _Tp
   330	  __value_type_or_identity_impl(float);
   331	
   332	template <typename _Tp>
   333	  using __value_type_or_identity_t
   334	    = decltype(__value_type_or_identity_impl<_Tp>(int()));
   335	
   336	// }}}
   337	// __is_vectorizable {{{
   338	template <typename _Tp>
   339	  struct __is_vectorizable : public is_arithmetic<_Tp> {};
   340	
   341	template <>
   342	  struct __is_vectorizable<bool> : public false_type {};
   343	
   344	template <typename _Tp>
   345	  inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
   346	
   347	// Deduces to a vectorizable type
   348	template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
   349	  using _Vectorizable = _Tp;
   350	
   351	// }}}
   352	// _LoadStorePtr / __is_possible_loadstore_conversion {{{
   353	template <typename _Ptr, typename _ValueType>
   354	  struct __is_possible_loadstore_conversion
   355	  : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
   356	
   357	template <>
   358	  struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
   359	
   360	// Deduces to a type allowed for load/store with the given value type.
   361	template <typename _Ptr, typename _ValueType,
   362		  typename = enable_if_t<
   363		    __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
   364	  using _LoadStorePtr = _Ptr;
   365	
   366	// }}}
   367	// __is_bitmask{{{
   368	template <typename _Tp, typename = void_t<>>
   369	  struct __is_bitmask : false_type {};
   370	
   371	template <typename _Tp>
   372	  inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
   373	
   374	// the __mmaskXX case:
   375	template <typename _Tp>
   376	  struct __is_bitmask<_Tp,
   377	    void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
   378	  : true_type {};
   379	
   380	// }}}
   381	// __int_for_sizeof{{{
   382	#pragma GCC diagnostic push
   383	#pragma GCC diagnostic ignored "-Wpedantic"
   384	template <size_t _Bytes>
   385	  constexpr auto
   386	  __int_for_sizeof()
   387	  {
   388	    if constexpr (_Bytes == sizeof(int))
   389	      return int();
   390	  #ifdef __clang__
   391	    else if constexpr (_Bytes == sizeof(char))
   392	      return char();
   393	  #else
   394	    else if constexpr (_Bytes == sizeof(_SChar))
   395	      return _SChar();
   396	  #endif
   397	    else if constexpr (_Bytes == sizeof(short))
   398	      return short();
   399	  #ifndef __clang__
   400	    else if constexpr (_Bytes == sizeof(long))
   401	      return long();
   402	  #endif
   403	    else if constexpr (_Bytes == sizeof(_LLong))
   404	      return _LLong();
   405	  #ifdef __SIZEOF_INT128__
   406	    else if constexpr (_Bytes == sizeof(__int128))
   407	      return __int128();
   408	  #endif // __SIZEOF_INT128__
   409	    else if constexpr (_Bytes % sizeof(int) == 0)
   410	      {
   411		constexpr size_t _Np = _Bytes / sizeof(int);
   412		struct _Ip
   413		{
   414		  int _M_data[_Np];
   415	
   416		  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
   417		  operator&(_Ip __rhs) const
   418		  {
   419		    return __generate_from_n_evaluations<_Np, _Ip>(
   420		      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
   421			return __rhs._M_data[__i] & _M_data[__i];
   422		      });
   423		  }
   424	
   425		  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
   426		  operator|(_Ip __rhs) const
   427		  {
   428		    return __generate_from_n_evaluations<_Np, _Ip>(
   429		      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
   430			return __rhs._M_data[__i] | _M_data[__i];
   431		      });
   432		  }
   433	
   434		  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
   435		  operator^(_Ip __rhs) const
   436		  {
   437		    return __generate_from_n_evaluations<_Np, _Ip>(
   438		      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
   439			return __rhs._M_data[__i] ^ _M_data[__i];
   440		      });
   441		  }
   442	
   443		  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
   444		  operator~() const
   445		  {
   446		    return __generate_from_n_evaluations<_Np, _Ip>(
   447		      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
   448		  }
   449		};
   450		return _Ip{};
   451	      }
   452	    else
   453	      static_assert(_Bytes != _Bytes, "this should be unreachable");
   454	  }
   455	#pragma GCC diagnostic pop
   456	
   457	template <typename _Tp>
   458	  using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
   459	
   460	template <size_t _Np>
   461	  using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
   462	
   463	// }}}
   464	// __is_fixed_size_abi{{{
   465	template <typename _Tp>
   466	  struct __is_fixed_size_abi : false_type {};
   467	
   468	template <int _Np>
   469	  struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
   470	
   471	template <typename _Tp>
   472	  inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
   473	
   474	// }}}
   475	// constexpr feature detection{{{
   476	constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
   477	constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
   478	constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
   479	constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
   480	constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
   481	constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
   482	constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
   483	constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
   484	constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
   485	constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
   486	constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
   487	constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
   488	constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
   489	constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
   490	constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
   491	constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
   492	constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
   493	constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
   494	constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
   495	constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
   496	constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
   497	constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
   498	constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
   499	constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
   500	
   501	constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
   502	constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
   503	constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
   504	constexpr inline bool __support_neon_float =
   505	#if defined __GCC_IEC_559
   506	  __GCC_IEC_559 == 0;
   507	#elif defined __FAST_MATH__
   508	  true;
   509	#else
   510	  false;
   511	#endif
   512	
   513	#ifdef _ARCH_PWR10
   514	constexpr inline bool __have_power10vec = true;
   515	#else
   516	constexpr inline bool __have_power10vec = false;
   517	#endif
   518	#ifdef __POWER9_VECTOR__
   519	constexpr inline bool __have_power9vec = true;
   520	#else
   521	constexpr inline bool __have_power9vec = false;
   522	#endif
   523	#if defined __POWER8_VECTOR__
   524	constexpr inline bool __have_power8vec = true;
   525	#else
   526	constexpr inline bool __have_power8vec = __have_power9vec;
   527	#endif
   528	#if defined __VSX__
   529	constexpr inline bool __have_power_vsx = true;
   530	#else
   531	constexpr inline bool __have_power_vsx = __have_power8vec;
   532	#endif
   533	#if defined __ALTIVEC__
   534	constexpr inline bool __have_power_vmx = true;
   535	#else
   536	constexpr inline bool __have_power_vmx = __have_power_vsx;
   537	#endif
   538	
   539	// }}}
   540	// __is_scalar_abi {{{
   541	template <typename _Abi>
   542	  constexpr bool
   543	  __is_scalar_abi()
   544	  { return is_same_v<simd_abi::scalar, _Abi>; }
   545	
   546	// }}}
   547	// __abi_bytes_v {{{
   548	template <template <int> class _Abi, int _Bytes>
   549	  constexpr int
   550	  __abi_bytes_impl(_Abi<_Bytes>*)
   551	  { return _Bytes; }
   552	
   553	template <typename _Tp>
   554	  constexpr int
   555	  __abi_bytes_impl(_Tp*)
   556	  { return -1; }
   557	
   558	template <typename _Abi>
   559	  inline constexpr int __abi_bytes_v
   560	    = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
   561	
   562	// }}}
   563	// __is_builtin_bitmask_abi {{{
   564	template <typename _Abi>
   565	  constexpr bool
   566	  __is_builtin_bitmask_abi()
   567	  { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
   568	
   569	// }}}
   570	// __is_sse_abi {{{
   571	template <typename _Abi>
   572	  constexpr bool
   573	  __is_sse_abi()
   574	  {
   575	    constexpr auto _Bytes = __abi_bytes_v<_Abi>;
   576	    return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
   577	  }
   578	
   579	// }}}
   580	// __is_avx_abi {{{
   581	template <typename _Abi>
   582	  constexpr bool
   583	  __is_avx_abi()
   584	  {
   585	    constexpr auto _Bytes = __abi_bytes_v<_Abi>;
   586	    return _Bytes > 16 && _Bytes <= 32
   587		   && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
   588	  }
   589	
   590	// }}}
   591	// __is_avx512_abi {{{
   592	template <typename _Abi>
   593	  constexpr bool
   594	  __is_avx512_abi()
   595	  {
   596	    constexpr auto _Bytes = __abi_bytes_v<_Abi>;
   597	    return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
   598	  }
   599	
   600	// }}}
   601	// __is_neon_abi {{{
   602	template <typename _Abi>
   603	  constexpr bool
   604	  __is_neon_abi()
   605	  {
   606	    constexpr auto _Bytes = __abi_bytes_v<_Abi>;
   607	    return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
   608	  }
   609	
   610	// }}}
   611	// __make_dependent_t {{{
   612	template <typename, typename _Up>
   613	  struct __make_dependent
   614	  { using type = _Up; };
   615	
   616	template <typename _Tp, typename _Up>
   617	  using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
   618	
   619	// }}}
   620	// ^^^ ---- type traits ---- ^^^
   621	
   622	// __invoke_ub{{{
   623	template <typename... _Args>
   624	  [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
   625	  __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
   626	  {
   627	#ifdef _GLIBCXX_DEBUG_UB
   628	    __builtin_fprintf(stderr, __msg, __args...);
   629	    __builtin_trap();
   630	#else
   631	    __builtin_unreachable();
   632	#endif
   633	  }
   634	
   635	// }}}
   636	// __assert_unreachable{{{
   637	template <typename _Tp>
   638	  struct __assert_unreachable
   639	  { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
   640	
   641	// }}}
   642	// __size_or_zero_v {{{
   643	template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
   644	  constexpr size_t
   645	  __size_or_zero_dispatch(int)
   646	  { return _Np; }
   647	
   648	template <typename _Tp, typename _Ap>
   649	  constexpr size_t
   650	  __size_or_zero_dispatch(float)
   651	  { return 0; }
   652	
   653	template <typename _Tp, typename _Ap>
   654	  inline constexpr size_t __size_or_zero_v
   655	     = __size_or_zero_dispatch<_Tp, _Ap>(0);
   656	
   657	// }}}
   658	// __div_roundup {{{
   659	inline constexpr size_t
   660	__div_roundup(size_t __a, size_t __b)
   661	{ return (__a + __b - 1) / __b; }
   662	
   663	// }}}
   664	// _ExactBool{{{
   665	class _ExactBool
   666	{
   667	  const bool _M_data;
   668	
   669	public:
   670	  _GLIBCXX_SIMD_INTRINSIC constexpr
   671	  _ExactBool(bool __b) : _M_data(__b) {}
   672	
   673	  _ExactBool(int) = delete;
   674	
   675	  _GLIBCXX_SIMD_INTRINSIC constexpr
   676	  operator bool() const
   677	  { return _M_data; }
   678	};
   679	
   680	// }}}
   681	// __may_alias{{{
   682	/**@internal
   683	 * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
   684	 * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
   685	 * that support it).
   686	 */
   687	template <typename _Tp>
   688	  using __may_alias [[__gnu__::__may_alias__]] = _Tp;
   689	
   690	// }}}
   691	// _UnsupportedBase {{{
   692	// simd and simd_mask base for unsupported <_Tp, _Abi>
   693	struct _UnsupportedBase
   694	{
   695	  _UnsupportedBase() = delete;
   696	  _UnsupportedBase(const _UnsupportedBase&) = delete;
   697	  _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
   698	  ~_UnsupportedBase() = delete;
   699	};
   700	
   701	// }}}
   702	// _InvalidTraits {{{
   703	/**
   704	 * @internal
   705	 * Defines the implementation of __a given <_Tp, _Abi>.
   706	 *
   707	 * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
   708	 * possible. Static assertions in the type definition do not suffice. It is
   709	 * important that SFINAE works.
   710	 */
   711	struct _InvalidTraits
   712	{
   713	  using _IsValid = false_type;
   714	  using _SimdBase = _UnsupportedBase;
   715	  using _MaskBase = _UnsupportedBase;
   716	
   717	  static constexpr size_t _S_full_size = 0;
   718	  static constexpr bool _S_is_partial = false;
   719	
   720	  static constexpr size_t _S_simd_align = 1;
   721	  struct _SimdImpl;
   722	  struct _SimdMember {};
   723	  struct _SimdCastType;
   724	
   725	  static constexpr size_t _S_mask_align = 1;
   726	  struct _MaskImpl;
   727	  struct _MaskMember {};
   728	  struct _MaskCastType;
   729	};
   730	
   731	// }}}
   732	// _SimdTraits {{{
   733	template <typename _Tp, typename _Abi, typename = void_t<>>
   734	  struct _SimdTraits : _InvalidTraits {};
   735	
   736	// }}}
   737	// __private_init, __bitset_init{{{
   738	/**
   739	 * @internal
   740	 * Tag used for private init constructor of simd and simd_mask
   741	 */
   742	inline constexpr struct _PrivateInit {} __private_init = {};
   743	
   744	inline constexpr struct _BitsetInit {} __bitset_init = {};
   745	
   746	// }}}
   747	// __is_narrowing_conversion<_From, _To>{{{
   748	template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
   749		  bool = is_arithmetic_v<_To>>
   750	  struct __is_narrowing_conversion;
   751	
   752	// ignore "signed/unsigned mismatch" in the following trait.
   753	// The implicit conversions will do the right thing here.
   754	template <typename _From, typename _To>
   755	  struct __is_narrowing_conversion<_From, _To, true, true>
   756	  : public __bool_constant<(
   757	      __digits_v<_From> > __digits_v<_To>
   758	      || __finite_max_v<_From> > __finite_max_v<_To>
   759	      || __finite_min_v<_From> < __finite_min_v<_To>
   760	      || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
   761	
   762	template <typename _Tp>
   763	  struct __is_narrowing_conversion<_Tp, bool, true, true>
   764	  : public true_type {};
   765	
   766	template <>
   767	  struct __is_narrowing_conversion<bool, bool, true, true>
   768	  : public false_type {};
   769	
   770	template <typename _Tp>
   771	  struct __is_narrowing_conversion<_Tp, _Tp, true, true>
   772	  : public false_type {};
   773	
   774	template <typename _From, typename _To>
   775	  struct __is_narrowing_conversion<_From, _To, false, true>
   776	  : public negation<is_convertible<_From, _To>> {};
   777	
   778	// }}}
   779	// __converts_to_higher_integer_rank{{{
   780	template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
   781	  struct __converts_to_higher_integer_rank : public true_type {};
   782	
   783	// this may fail for char -> short if sizeof(char) == sizeof(short)
   784	template <typename _From, typename _To>
   785	  struct __converts_to_higher_integer_rank<_From, _To, false>
   786	  : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
   787	
   788	// }}}
   789	// __data(simd/simd_mask) {{{
   790	template <typename _Tp, typename _Ap>
   791	  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
   792	  __data(const simd<_Tp, _Ap>& __x);
   793	
   794	template <typename _Tp, typename _Ap>
   795	  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
   796	  __data(simd<_Tp, _Ap>& __x);
   797	
   798	template <typename _Tp, typename _Ap>
   799	  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
   800	  __data(const simd_mask<_Tp, _Ap>& __x);
   801	
   802	template <typename _Tp, typename _Ap>
   803	  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
   804	  __data(simd_mask<_Tp, _Ap>& __x);
   805	
   806	// }}}
   807	// _SimdConverter {{{
   808	template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
   809		  typename = void>
   810	  struct _SimdConverter;
   811	
   812	template <typename _Tp, typename _Ap>
   813	  struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
   814	  {
   815	    template <typename _Up>
   816	      _GLIBCXX_SIMD_INTRINSIC const _Up&
   817	      operator()(const _Up& __x)
   818	      { return __x; }
   819	  };
   820	
   821	// }}}
   822	// __to_value_type_or_member_type {{{
   823	template <typename _V>
   824	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
   825	  __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
   826	  { return __data(__x); }
   827	
   828	template <typename _V>
   829	  _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
   830	  __to_value_type_or_member_type(const typename _V::value_type& __x)
   831	  { return __x; }
   832	
   833	// }}}
   834	// __bool_storage_member_type{{{
   835	template <size_t _Size>
   836	  struct __bool_storage_member_type;
   837	
   838	template <size_t _Size>
   839	  using __bool_storage_member_type_t =
   840	    typename __bool_storage_member_type<_Size>::type;
   841	
   842	// }}}
   843	// _SimdTuple {{{
   844	// why not tuple?
   845	// 1. tuple gives no guarantee about the storage order, but I require
   846	// storage
   847	//    equivalent to array<_Tp, _Np>
   848	// 2. direct access to the element type (first template argument)
   849	// 3. enforces equal element type, only different _Abi types are allowed
   850	template <typename _Tp, typename... _Abis>
   851	  struct _SimdTuple;
   852	
   853	//}}}
   854	// __fixed_size_storage_t {{{
   855	template <typename _Tp, int _Np>
   856	  struct __fixed_size_storage;
   857	
   858	template <typename _Tp, int _Np>
   859	  using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
   860	
   861	// }}}
   862	// _SimdWrapper fwd decl{{{
   863	template <typename _Tp, size_t _Size, typename = void_t<>>
   864	  struct _SimdWrapper;
   865	
   866	template <typename _Tp>
   867	  using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
   868	template <typename _Tp>
   869	  using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
   870	template <typename _Tp>
   871	  using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
   872	template <typename _Tp>
   873	  using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
   874	
   875	// }}}
   876	// __is_simd_wrapper {{{
   877	template <typename _Tp>
   878	  struct __is_simd_wrapper : false_type {};
   879	
   880	template <typename _Tp, size_t _Np>
   881	  struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
   882	
   883	template <typename _Tp>
   884	  inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
   885	
   886	// }}}
   887	// _BitOps {{{
   888	struct _BitOps
   889	{
   890	  // _S_bit_iteration {{{
   891	  template <typename _Tp, typename _Fp>
   892	    static void
   893	    _S_bit_iteration(_Tp __mask, _Fp&& __f)
   894	    {
   895	      static_assert(sizeof(_ULLong) >= sizeof(_Tp));
   896	      conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
   897	      if constexpr (is_convertible_v<_Tp, decltype(__k)>)
   898		__k = __mask;
   899	      else
   900		__k = __mask.to_ullong();
   901	      while(__k)
   902		{
   903		  __f(std::__countr_zero(__k));
   904		  __k &= (__k - 1);
   905		}
   906	    }
   907	
   908	  //}}}
   909	};
   910	
   911	//}}}
   912	// __increment, __decrement {{{
   913	template <typename _Tp = void>
   914	  struct __increment
   915	  { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
   916	
   917	template <>
   918	  struct __increment<void>
   919	  {
   920	    template <typename _Tp>
   921	      constexpr _Tp
   922	      operator()(_Tp __a) const
   923	      { return ++__a; }
   924	  };
   925	
   926	template <typename _Tp = void>
   927	  struct __decrement
   928	  { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
   929	
   930	template <>
   931	  struct __decrement<void>
   932	  {
   933	    template <typename _Tp>
   934	      constexpr _Tp
   935	      operator()(_Tp __a) const
   936	      { return --__a; }
   937	  };
   938	
   939	// }}}
   940	// _ValuePreserving(OrInt) {{{
   941	template <typename _From, typename _To,
   942		  typename = enable_if_t<negation<
   943		    __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
   944	  using _ValuePreserving = _From;
   945	
   946	template <typename _From, typename _To,
   947		  typename _DecayedFrom = __remove_cvref_t<_From>,
   948		  typename = enable_if_t<conjunction<
   949		    is_convertible<_From, _To>,
   950		    disjunction<
   951		      is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
   952		      conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
   953		      negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
   954	  using _ValuePreservingOrInt = _From;
   955	
   956	// }}}
   957	// __intrinsic_type {{{
   958	template <typename _Tp, size_t _Bytes, typename = void_t<>>
   959	  struct __intrinsic_type;
   960	
   961	template <typename _Tp, size_t _Size>
   962	  using __intrinsic_type_t =
   963	    typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
   964	
   965	template <typename _Tp>
   966	  using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
   967	template <typename _Tp>
   968	  using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
   969	template <typename _Tp>
   970	  using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
   971	template <typename _Tp>
   972	  using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
   973	template <typename _Tp>
   974	  using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
   975	template <typename _Tp>
   976	  using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
   977	
   978	// }}}
   979	// _BitMask {{{
   980	template <size_t _Np, bool _Sanitized = false>
   981	  struct _BitMask;
   982	
   983	template <size_t _Np, bool _Sanitized>
   984	  struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
   985	
   986	template <size_t _Np>
   987	  using _SanitizedBitMask = _BitMask<_Np, true>;
   988	
   989	template <size_t _Np, bool _Sanitized>
   990	  struct _BitMask
   991	  {
   992	    static_assert(_Np > 0);
   993	
   994	    static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
   995	
   996	    using _Tp = conditional_t<_Np == 1, bool,
   997				      make_unsigned_t<__int_with_sizeof_t<std::min(
   998					sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
   999	
  1000	    static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
  1001	
  1002	    _Tp _M_bits[_S_array_size];
  1003	
  1004	    static constexpr int _S_unused_bits
  1005	      = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
  1006	
  1007	    static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
  1008	
  1009	    constexpr _BitMask() noexcept = default;
  1010	
  1011	    constexpr _BitMask(unsigned long long __x) noexcept
  1012	      : _M_bits{static_cast<_Tp>(__x)} {}
  1013	
  1014	    _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
  1015	
  1016	    constexpr _BitMask(const _BitMask&) noexcept = default;
  1017	
  1018	    template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
  1019								 && _Sanitized == true>>
  1020	      constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
  1021		: _BitMask(__rhs._M_sanitized()) {}
  1022	
  1023	    constexpr operator _SimdWrapper<bool, _Np>() const noexcept
  1024	    {
  1025	      static_assert(_S_array_size == 1);
  1026	      return _M_bits[0];
  1027	    }
  1028	
  1029	    // precondition: is sanitized
  1030	    constexpr _Tp
  1031	    _M_to_bits() const noexcept
  1032	    {
  1033	      static_assert(_S_array_size == 1);
  1034	      return _M_bits[0];
  1035	    }
  1036	
  1037	    // precondition: is sanitized
  1038	    constexpr unsigned long long
  1039	    to_ullong() const noexcept
  1040	    {
  1041	      static_assert(_S_array_size == 1);
  1042	      return _M_bits[0];
  1043	    }
  1044	
  1045	    // precondition: is sanitized
  1046	    constexpr unsigned long
  1047	    to_ulong() const noexcept
  1048	    {
  1049	      static_assert(_S_array_size == 1);
  1050	      return _M_bits[0];
  1051	    }
  1052	
  1053	    constexpr bitset<_Np>
  1054	    _M_to_bitset() const noexcept
  1055	    {
  1056	      static_assert(_S_array_size == 1);
  1057	      return _M_bits[0];
  1058	    }
  1059	
  1060	    constexpr decltype(auto)
  1061	    _M_sanitized() const noexcept
  1062	    {
  1063	      if constexpr (_Sanitized)
  1064		return *this;
  1065	      else if constexpr (_Np == 1)
  1066		return _SanitizedBitMask<_Np>(_M_bits[0]);
  1067	      else
  1068		{
  1069		  _SanitizedBitMask<_Np> __r = {};
  1070		  for (int __i = 0; __i < _S_array_size; ++__i)
  1071		    __r._M_bits[__i] = _M_bits[__i];
  1072		  if constexpr (_S_unused_bits > 0)
  1073		    __r._M_bits[_S_array_size - 1] &= _S_bitmask;
  1074		  return __r;
  1075		}
  1076	    }
  1077	
  1078	    template <size_t _Mp, bool _LSanitized>
  1079	      constexpr _BitMask<_Np + _Mp, _Sanitized>
  1080	      _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
  1081	      {
  1082		constexpr size_t _RN = _Np + _Mp;
  1083		using _Rp = _BitMask<_RN, _Sanitized>;
  1084		if constexpr (_Rp::_S_array_size == 1)
  1085		  {
  1086		    _Rp __r{{_M_bits[0]}};
  1087		    __r._M_bits[0] <<= _Mp;
  1088		    __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
  1089		    return __r;
  1090		  }
  1091		else
  1092		  __assert_unreachable<_Rp>();
  1093	      }
  1094	
  1095	    // Return a new _BitMask with size _NewSize while dropping _DropLsb least
  1096	    // significant bits. If the operation implicitly produces a sanitized bitmask,
  1097	    // the result type will have _Sanitized set.
  1098	    template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
  1099	      constexpr auto
  1100	      _M_extract() const noexcept
  1101	      {
  1102		static_assert(_Np > _DropLsb);
  1103		static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
  1104			      "not implemented for bitmasks larger than one ullong");
  1105		if constexpr (_NewSize == 1)
  1106		  // must sanitize because the return _Tp is bool
  1107		  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
  1108		else
  1109		  return _BitMask<_NewSize,
  1110				  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
  1111				    && _NewSize + _DropLsb <= _Np)
  1112				   || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
  1113				       && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
  1114									>> _DropLsb);
  1115	      }
  1116	
  1117	    // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
  1118	    constexpr bool
  1119	    all() const noexcept
  1120	    {
  1121	      if constexpr (_Np == 1)
  1122		return _M_bits[0];
  1123	      else if constexpr (!_Sanitized)
  1124		return _M_sanitized().all();
  1125	      else
  1126		{
  1127		  constexpr _Tp __allbits = ~_Tp();
  1128		  for (int __i = 0; __i < _S_array_size - 1; ++__i)
  1129		    if (_M_bits[__i] != __allbits)
  1130		      return false;
  1131		  return _M_bits[_S_array_size - 1] == _S_bitmask;
  1132		}
  1133	    }
  1134	
  1135	    // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
  1136	    // false.
  1137	    constexpr bool
  1138	    any() const noexcept
  1139	    {
  1140	      if constexpr (_Np == 1)
  1141		return _M_bits[0];
  1142	      else if constexpr (!_Sanitized)
  1143		return _M_sanitized().any();
  1144	      else
  1145		{
  1146		  for (int __i = 0; __i < _S_array_size - 1; ++__i)
  1147		    if (_M_bits[__i] != 0)
  1148		      return true;
  1149		  return _M_bits[_S_array_size - 1] != 0;
  1150		}
  1151	    }
  1152	
  1153	    // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
  1154	    constexpr bool
  1155	    none() const noexcept
  1156	    {
  1157	      if constexpr (_Np == 1)
  1158		return !_M_bits[0];
  1159	      else if constexpr (!_Sanitized)
  1160		return _M_sanitized().none();
  1161	      else
  1162		{
  1163		  for (int __i = 0; __i < _S_array_size - 1; ++__i)
  1164		    if (_M_bits[__i] != 0)
  1165		      return false;
  1166		  return _M_bits[_S_array_size - 1] == 0;
  1167		}
  1168	    }
  1169	
  1170	    // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
  1171	    // false.
  1172	    constexpr int
  1173	    count() const noexcept
  1174	    {
  1175	      if constexpr (_Np == 1)
  1176		return _M_bits[0];
  1177	      else if constexpr (!_Sanitized)
  1178		return _M_sanitized().none();
  1179	      else
  1180		{
  1181		  int __result = __builtin_popcountll(_M_bits[0]);
  1182		  for (int __i = 1; __i < _S_array_size; ++__i)
  1183		    __result += __builtin_popcountll(_M_bits[__i]);
  1184		  return __result;
  1185		}
  1186	    }
  1187	
  1188	    // Returns the bit at offset __i as bool.
  1189	    constexpr bool
  1190	    operator[](size_t __i) const noexcept
  1191	    {
  1192	      if constexpr (_Np == 1)
  1193		return _M_bits[0];
  1194	      else if constexpr (_S_array_size == 1)
  1195		return (_M_bits[0] >> __i) & 1;
  1196	      else
  1197		{
  1198		  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
  1199		  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
  1200		  return (_M_bits[__j] >> __shift) & 1;
  1201		}
  1202	    }
  1203	
  1204	    template <size_t __i>
  1205	      constexpr bool
  1206	      operator[](_SizeConstant<__i>) const noexcept
  1207	      {
  1208		static_assert(__i < _Np);
  1209		constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
  1210		constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
  1211		return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
  1212	      }
  1213	
  1214	    // Set the bit at offset __i to __x.
  1215	    constexpr void
  1216	    set(size_t __i, bool __x) noexcept
  1217	    {
  1218	      if constexpr (_Np == 1)
  1219		_M_bits[0] = __x;
  1220	      else if constexpr (_S_array_size == 1)
  1221		{
  1222		  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
  1223		  _M_bits[0] |= _Tp(_Tp(__x) << __i);
  1224		}
  1225	      else
  1226		{
  1227		  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
  1228		  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
  1229		  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
  1230		  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
  1231		}
  1232	    }
  1233	
  1234	    template <size_t __i>
  1235	      constexpr void
  1236	      set(_SizeConstant<__i>, bool __x) noexcept
  1237	      {
  1238		static_assert(__i < _Np);
  1239		if constexpr (_Np == 1)
  1240		  _M_bits[0] = __x;
  1241		else
  1242		  {
  1243		    constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
  1244		    constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
  1245		    constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
  1246		    _M_bits[__j] &= __mask;
  1247		    _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
  1248		  }
  1249	      }
  1250	
  1251	    // Inverts all bits. Sanitized input leads to sanitized output.
  1252	    constexpr _BitMask
  1253	    operator~() const noexcept
  1254	    {
  1255	      if constexpr (_Np == 1)
  1256		return !_M_bits[0];
  1257	      else
  1258		{
  1259		  _BitMask __result{};
  1260		  for (int __i = 0; __i < _S_array_size - 1; ++__i)
  1261		    __result._M_bits[__i] = ~_M_bits[__i];
  1262		  if constexpr (_Sanitized)
  1263		    __result._M_bits[_S_array_size - 1]
  1264		      = _M_bits[_S_array_size - 1] ^ _S_bitmask;
  1265		  else
  1266		    __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
  1267		  return __result;
  1268		}
  1269	    }
  1270	
  1271	    constexpr _BitMask&
  1272	    operator^=(const _BitMask& __b) & noexcept
  1273	    {
  1274	      __execute_n_times<_S_array_size>(
  1275		[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
  1276	      return *this;
  1277	    }
  1278	
  1279	    constexpr _BitMask&
  1280	    operator|=(const _BitMask& __b) & noexcept
  1281	    {
  1282	      __execute_n_times<_S_array_size>(
  1283		[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
  1284	      return *this;
  1285	    }
  1286	
  1287	    constexpr _BitMask&
  1288	    operator&=(const _BitMask& __b) & noexcept
  1289	    {
  1290	      __execute_n_times<_S_array_size>(
  1291		[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
  1292	      return *this;
  1293	    }
  1294	
  1295	    friend constexpr _BitMask
  1296	    operator^(const _BitMask& __a, const _BitMask& __b) noexcept
  1297	    {
  1298	      _BitMask __r = __a;
  1299	      __r ^= __b;
  1300	      return __r;
  1301	    }
  1302	
  1303	    friend constexpr _BitMask
  1304	    operator|(const _BitMask& __a, const _BitMask& __b) noexcept
  1305	    {
  1306	      _BitMask __r = __a;
  1307	      __r |= __b;
  1308	      return __r;
  1309	    }
  1310	
  1311	    friend constexpr _BitMask
  1312	    operator&(const _BitMask& __a, const _BitMask& __b) noexcept
  1313	    {
  1314	      _BitMask __r = __a;
  1315	      __r &= __b;
  1316	      return __r;
  1317	    }
  1318	
  1319	    _GLIBCXX_SIMD_INTRINSIC
  1320	    constexpr bool
  1321	    _M_is_constprop() const
  1322	    {
  1323	      if constexpr (_S_array_size == 0)
  1324		return __builtin_constant_p(_M_bits[0]);
  1325	      else
  1326		{
  1327		  for (int __i = 0; __i < _S_array_size; ++__i)
  1328		    if (!__builtin_constant_p(_M_bits[__i]))
  1329		      return false;
  1330		  return true;
  1331		}
  1332	    }
  1333	  };
  1334	
  1335	// }}}
  1336	
  1337	// vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
  1338	// __min_vector_size {{{
  1339	template <typename _Tp = void>
  1340	  static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
  1341	
  1342	#if _GLIBCXX_SIMD_HAVE_NEON
  1343	template <>
  1344	  inline constexpr int __min_vector_size<void> = 8;
  1345	#else
  1346	template <>
  1347	  inline constexpr int __min_vector_size<void> = 16;
  1348	#endif
  1349	
  1350	// }}}
  1351	// __vector_type {{{
  1352	template <typename _Tp, size_t _Np, typename = void>
  1353	  struct __vector_type_n {};
  1354	
  1355	// substition failure for 0-element case
  1356	template <typename _Tp>
  1357	  struct __vector_type_n<_Tp, 0, void> {};
  1358	
  1359	// special case 1-element to be _Tp itself
  1360	template <typename _Tp>
  1361	  struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
  1362	  { using type = _Tp; };
  1363	
  1364	// else, use GNU-style builtin vector types
  1365	template <typename _Tp, size_t _Np>
  1366	  struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
  1367	  {
  1368	    static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
  1369	
  1370	    static constexpr size_t _S_Bytes =
  1371	#ifdef __i386__
  1372	      // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
  1373	      // those objects are passed via MMX registers and nothing ever calls EMMS.
  1374	      _S_Np2 == 8 ? 16 :
  1375	#endif
  1376	      _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
  1377					      : _S_Np2;
  1378	
  1379	    using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
  1380	  };
  1381	
  1382	template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
  1383	  struct __vector_type;
  1384	
  1385	template <typename _Tp, size_t _Bytes>
  1386	  struct __vector_type<_Tp, _Bytes, 0>
  1387	  : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
  1388	
  1389	template <typename _Tp, size_t _Size>
  1390	  using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
  1391	
  1392	template <typename _Tp>
  1393	  using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
  1394	template <typename _Tp>
  1395	  using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
  1396	template <typename _Tp>
  1397	  using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
  1398	template <typename _Tp>
  1399	  using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
  1400	template <typename _Tp>
  1401	  using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
  1402	template <typename _Tp>
  1403	  using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
  1404	
  1405	// }}}
  1406	// __is_vector_type {{{
  1407	template <typename _Tp, typename = void_t<>>
  1408	  struct __is_vector_type : false_type {};
  1409	
  1410	template <typename _Tp>
  1411	  struct __is_vector_type<
  1412	    _Tp, void_t<typename __vector_type<
  1413		   remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
  1414	    : is_same<_Tp, typename __vector_type<
  1415			     remove_reference_t<decltype(declval<_Tp>()[0])>,
  1416			     sizeof(_Tp)>::type> {};
  1417	
  1418	template <typename _Tp>
  1419	  inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
  1420	
  1421	// }}}
  1422	// __is_intrinsic_type {{{
  1423	#if _GLIBCXX_SIMD_HAVE_SSE_ABI
  1424	template <typename _Tp>
  1425	  using __is_intrinsic_type = __is_vector_type<_Tp>;
  1426	#else // not SSE (x86)
  1427	template <typename _Tp, typename = void_t<>>
  1428	  struct __is_intrinsic_type : false_type {};
  1429	
  1430	template <typename _Tp>
  1431	  struct __is_intrinsic_type<
  1432	    _Tp, void_t<typename __intrinsic_type<
  1433		   remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
  1434	    : is_same<_Tp, typename __intrinsic_type<
  1435			     remove_reference_t<decltype(declval<_Tp>()[0])>,
  1436			     sizeof(_Tp)>::type> {};
  1437	#endif
  1438	
  1439	template <typename _Tp>
  1440	  inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
  1441	
  1442	// }}}
  1443	// _VectorTraits{{{
  1444	template <typename _Tp, typename = void_t<>>
  1445	  struct _VectorTraitsImpl;
  1446	
  1447	template <typename _Tp>
  1448	  struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
  1449						      || __is_intrinsic_type_v<_Tp>>>
  1450	  {
  1451	    using type = _Tp;
  1452	    using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
  1453	    static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
  1454	    using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
  1455	    template <typename _Up, int _W = _S_full_size>
  1456	      static constexpr bool _S_is
  1457		= is_same_v<value_type, _Up> && _W == _S_full_size;
  1458	  };
  1459	
  1460	template <typename _Tp, size_t _Np>
  1461	  struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
  1462				   void_t<__vector_type_t<_Tp, _Np>>>
  1463	  {
  1464	    using type = __vector_type_t<_Tp, _Np>;
  1465	    using value_type = _Tp;
  1466	    static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
  1467	    using _Wrapper = _SimdWrapper<_Tp, _Np>;
  1468	    static constexpr bool _S_is_partial = (_Np == _S_full_size);
  1469	    static constexpr int _S_partial_width = _Np;
  1470	    template <typename _Up, int _W = _S_full_size>
  1471	      static constexpr bool _S_is
  1472		= is_same_v<value_type, _Up>&& _W == _S_full_size;
  1473	  };
  1474	
  1475	template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
  1476	  using _VectorTraits = _VectorTraitsImpl<_Tp>;
  1477	
  1478	// }}}
  1479	// __as_vector{{{
  1480	template <typename _V>
  1481	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  1482	  __as_vector(_V __x)
  1483	  {
  1484	    if constexpr (__is_vector_type_v<_V>)
  1485	      return __x;
  1486	    else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
  1487	      return __data(__x)._M_data;
  1488	    else if constexpr (__is_vectorizable_v<_V>)
  1489	      return __vector_type_t<_V, 2>{__x};
  1490	    else
  1491	      return __x._M_data;
  1492	  }
  1493	
  1494	// }}}
  1495	// __as_wrapper{{{
  1496	template <size_t _Np = 0, typename _V>
  1497	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  1498	  __as_wrapper(_V __x)
  1499	  {
  1500	    if constexpr (__is_vector_type_v<_V>)
  1501	      return _SimdWrapper<typename _VectorTraits<_V>::value_type,
  1502				  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
  1503	    else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
  1504	      {
  1505		static_assert(_V::size() == _Np);
  1506		return __data(__x);
  1507	      }
  1508	    else
  1509	      {
  1510		static_assert(_V::_S_size == _Np);
  1511		return __x;
  1512	      }
  1513	  }
  1514	
  1515	// }}}
  1516	// __intrin_bitcast{{{
  1517	template <typename _To, typename _From>
  1518	  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  1519	  __intrin_bitcast(_From __v)
  1520	  {
  1521	    static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
  1522			    && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
  1523	    if constexpr (sizeof(_To) == sizeof(_From))
  1524	      return reinterpret_cast<_To>(__v);
  1525	    else if constexpr (sizeof(_From) > sizeof(_To))
  1526	      if constexpr (sizeof(_To) >= 16)
  1527		return reinterpret_cast<const __may_alias<_To>&>(__v);
  1528	      else
  1529		{
  1530		  _To __r;
  1531		  __builtin_memcpy(&__r, &__v, sizeof(_To));
  1532		  return __r;
  1533		}
  1534	#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
  1535	    else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
  1536	      return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
  1537		reinterpret_cast<__vector_type_t<float, 4>>(__v)));
  1538	    else if constexpr (__have_avx512f && sizeof(_From) == 16
  1539			       && sizeof(_To) == 64)
  1540	      return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
  1541		reinterpret_cast<__vector_type_t<float, 4>>(__v)));
  1542	    else if constexpr (__have_avx512f && sizeof(_From) == 32
  1543			       && sizeof(_To) == 64)
  1544	      return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
  1545		reinterpret_cast<__vector_type_t<float, 8>>(__v)));
  1546	#endif // _GLIBCXX_SIMD_X86INTRIN
  1547	    else if constexpr (sizeof(__v) <= 8)
  1548	      return reinterpret_cast<_To>(
  1549		__vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
  1550		  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
  1551	    else
  1552	      {
  1553		static_assert(sizeof(_To) > sizeof(_From));
  1554		_To __r = {};
  1555		__builtin_memcpy(&__r, &__v, sizeof(_From));
  1556		return __r;
  1557	      }
  1558	  }
  1559	
  1560	// }}}
  1561	// __vector_bitcast{{{
  1562	template <typename _To, size_t _NN = 0, typename _From,
  1563		  typename _FromVT = _VectorTraits<_From>,
  1564		  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
  1565	  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
  1566	  __vector_bitcast(_From __x)
  1567	  {
  1568	    using _R = __vector_type_t<_To, _Np>;
  1569	    return __intrin_bitcast<_R>(__x);
  1570	  }
  1571	
  1572	template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
  1573		  size_t _Np
  1574		  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
  1575	  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
  1576	  __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
  1577	  {
  1578	    static_assert(_Np > 1);
  1579	    return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
  1580	  }
  1581	
  1582	// }}}
  1583	// __convert_x86 declarations {{{
  1584	#ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
  1585	template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  1586	  _To __convert_x86(_Tp);
  1587	
  1588	template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  1589	  _To __convert_x86(_Tp, _Tp);
  1590	
  1591	template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  1592	  _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
  1593	
  1594	template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  1595	  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
  1596	
  1597	template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  1598	  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
  1599			    _Tp, _Tp, _Tp, _Tp);
  1600	#endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
  1601	
  1602	//}}}
  1603	// __bit_cast {{{
  1604	template <typename _To, typename _From>
  1605	  _GLIBCXX_SIMD_INTRINSIC constexpr _To
  1606	  __bit_cast(const _From __x)
  1607	  {
  1608	    // TODO: implement with / replace by __builtin_bit_cast ASAP
  1609	    static_assert(sizeof(_To) == sizeof(_From));
  1610	    constexpr bool __to_is_vectorizable
  1611	      = is_arithmetic_v<_To> || is_enum_v<_To>;
  1612	    constexpr bool __from_is_vectorizable
  1613	      = is_arithmetic_v<_From> || is_enum_v<_From>;
  1614	    if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
  1615	      return reinterpret_cast<_To>(__x);
  1616	    else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
  1617	      {
  1618		using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
  1619		return reinterpret_cast<_To>(_FV{__x});
  1620	      }
  1621	    else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
  1622	      {
  1623		using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
  1624		using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
  1625		return reinterpret_cast<_TV>(_FV{__x})[0];
  1626	      }
  1627	    else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
  1628	      {
  1629		using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
  1630		return reinterpret_cast<_TV>(__x)[0];
  1631	      }
  1632	    else
  1633	      {
  1634		_To __r;
  1635		__builtin_memcpy(reinterpret_cast<char*>(&__r),
  1636				 reinterpret_cast<const char*>(&__x), sizeof(_To));
  1637		return __r;
  1638	      }
  1639	  }
  1640	
  1641	// }}}
  1642	// __to_intrin {{{
  1643	template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
  1644		  typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
  1645	  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  1646	  __to_intrin(_Tp __x)
  1647	  {
  1648	    static_assert(sizeof(__x) <= sizeof(_R),
  1649			  "__to_intrin may never drop values off the end");
  1650	    if constexpr (sizeof(__x) == sizeof(_R))
  1651	      return reinterpret_cast<_R>(__as_vector(__x));
  1652	    else
  1653	      {
  1654		using _Up = __int_for_sizeof_t<_Tp>;
  1655		return reinterpret_cast<_R>(
  1656		  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
  1657	      }
  1658	  }
  1659	
  1660	// }}}
  1661	// __make_vector{{{
  1662	template <typename _Tp, typename... _Args>
  1663	  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
  1664	  __make_vector(const _Args&... __args)
  1665	  { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
  1666	
  1667	// }}}
  1668	// __vector_broadcast{{{
  1669	template <size_t _Np, typename _Tp, size_t... _I>
  1670	  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
  1671	  __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
  1672	  { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
  1673	
  1674	template <size_t _Np, typename _Tp>
  1675	  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
  1676	  __vector_broadcast(_Tp __x)
  1677	  { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
  1678	
  1679	// }}}
  1680	// __generate_vector{{{
  1681	  template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
  1682	  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
  1683	  __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
  1684	  { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
  1685	
  1686	template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
  1687	  _GLIBCXX_SIMD_INTRINSIC constexpr _V
  1688	  __generate_vector(_Gp&& __gen)
  1689	  {
  1690	    if constexpr (__is_vector_type_v<_V>)
  1691	      return __generate_vector_impl<typename _VVT::value_type,
  1692					    _VVT::_S_full_size>(
  1693		static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
  1694	    else
  1695	      return __generate_vector_impl<typename _VVT::value_type,
  1696					    _VVT::_S_partial_width>(
  1697		static_cast<_Gp&&>(__gen),
  1698		make_index_sequence<_VVT::_S_partial_width>());
  1699	  }
  1700	
  1701	template <typename _Tp, size_t _Np, typename _Gp>
  1702	  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
  1703	  __generate_vector(_Gp&& __gen)
  1704	  {
  1705	    return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
  1706						    make_index_sequence<_Np>());
  1707	  }
  1708	
  1709	// }}}
  1710	// __xor{{{
  1711	template <typename _TW>
  1712	  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
  1713	  __xor(_TW __a, _TW __b) noexcept
  1714	  {
  1715	    if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
  1716	      {
  1717		using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
  1718						   _VectorTraitsImpl<_TW>>::value_type;
  1719		if constexpr (is_floating_point_v<_Tp>)
  1720		  {
  1721		    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
  1722		    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
  1723						 ^ __vector_bitcast<_Ip>(__b));
  1724		  }
  1725		else if constexpr (__is_vector_type_v<_TW>)
  1726		  return __a ^ __b;
  1727		else
  1728		  return __a._M_data ^ __b._M_data;
  1729	      }
  1730	    else
  1731	      return __a ^ __b;
  1732	  }
  1733	
  1734	// }}}
  1735	// __or{{{
  1736	template <typename _TW>
  1737	  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
  1738	  __or(_TW __a, _TW __b) noexcept
  1739	  {
  1740	    if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
  1741	      {
  1742		using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
  1743						   _VectorTraitsImpl<_TW>>::value_type;
  1744		if constexpr (is_floating_point_v<_Tp>)
  1745		  {
  1746		    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
  1747		    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
  1748						 | __vector_bitcast<_Ip>(__b));
  1749		  }
  1750		else if constexpr (__is_vector_type_v<_TW>)
  1751		  return __a | __b;
  1752		else
  1753		  return __a._M_data | __b._M_data;
  1754	      }
  1755	    else
  1756	      return __a | __b;
  1757	  }
  1758	
  1759	// }}}
  1760	// __and{{{
  1761	template <typename _TW>
  1762	  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
  1763	  __and(_TW __a, _TW __b) noexcept
  1764	  {
  1765	    if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
  1766	      {
  1767		using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
  1768						   _VectorTraitsImpl<_TW>>::value_type;
  1769		if constexpr (is_floating_point_v<_Tp>)
  1770		  {
  1771		    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
  1772		    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
  1773						 & __vector_bitcast<_Ip>(__b));
  1774		  }
  1775		else if constexpr (__is_vector_type_v<_TW>)
  1776		  return __a & __b;
  1777		else
  1778		  return __a._M_data & __b._M_data;
  1779	      }
  1780	    else
  1781	      return __a & __b;
  1782	  }
  1783	
  1784	// }}}
  1785	// __andnot{{{
  1786	#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
  1787	static constexpr struct
  1788	{
  1789	  _GLIBCXX_SIMD_INTRINSIC __v4sf
  1790	  operator()(__v4sf __a, __v4sf __b) const noexcept
  1791	  { return __builtin_ia32_andnps(__a, __b); }
  1792	
  1793	  _GLIBCXX_SIMD_INTRINSIC __v2df
  1794	  operator()(__v2df __a, __v2df __b) const noexcept
  1795	  { return __builtin_ia32_andnpd(__a, __b); }
  1796	
  1797	  _GLIBCXX_SIMD_INTRINSIC __v2di
  1798	  operator()(__v2di __a, __v2di __b) const noexcept
  1799	  { return __builtin_ia32_pandn128(__a, __b); }
  1800	
  1801	  _GLIBCXX_SIMD_INTRINSIC __v8sf
  1802	  operator()(__v8sf __a, __v8sf __b) const noexcept
  1803	  { return __builtin_ia32_andnps256(__a, __b); }
  1804	
  1805	  _GLIBCXX_SIMD_INTRINSIC __v4df
  1806	  operator()(__v4df __a, __v4df __b) const noexcept
  1807	  { return __builtin_ia32_andnpd256(__a, __b); }
  1808	
  1809	  _GLIBCXX_SIMD_INTRINSIC __v4di
  1810	  operator()(__v4di __a, __v4di __b) const noexcept
  1811	  {
  1812	    if constexpr (__have_avx2)
  1813	      return __builtin_ia32_andnotsi256(__a, __b);
  1814	    else
  1815	      return reinterpret_cast<__v4di>(
  1816		__builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
  1817					 reinterpret_cast<__v4df>(__b)));
  1818	  }
  1819	
  1820	  _GLIBCXX_SIMD_INTRINSIC __v16sf
  1821	  operator()(__v16sf __a, __v16sf __b) const noexcept
  1822	  {
  1823	    if constexpr (__have_avx512dq)
  1824	      return _mm512_andnot_ps(__a, __b);
  1825	    else
  1826	      return reinterpret_cast<__v16sf>(
  1827		_mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
  1828				    reinterpret_cast<__v8di>(__b)));
  1829	  }
  1830	
  1831	  _GLIBCXX_SIMD_INTRINSIC __v8df
  1832	  operator()(__v8df __a, __v8df __b) const noexcept
  1833	  {
  1834	    if constexpr (__have_avx512dq)
  1835	      return _mm512_andnot_pd(__a, __b);
  1836	    else
  1837	      return reinterpret_cast<__v8df>(
  1838		_mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
  1839				    reinterpret_cast<__v8di>(__b)));
  1840	  }
  1841	
  1842	  _GLIBCXX_SIMD_INTRINSIC __v8di
  1843	  operator()(__v8di __a, __v8di __b) const noexcept
  1844	  { return _mm512_andnot_si512(__a, __b); }
  1845	} _S_x86_andnot;
  1846	#endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
  1847	
  1848	template <typename _TW>
  1849	  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
  1850	  __andnot(_TW __a, _TW __b) noexcept
  1851	  {
  1852	    if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
  1853	      {
  1854		using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
  1855					   _VectorTraitsImpl<_TW>>;
  1856		using _Tp = typename _TVT::value_type;
  1857	#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
  1858		if constexpr (sizeof(_TW) >= 16)
  1859		  {
  1860		    const auto __ai = __to_intrin(__a);
  1861		    const auto __bi = __to_intrin(__b);
  1862		    if (!__builtin_is_constant_evaluated()
  1863			&& !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
  1864		      {
  1865			const auto __r = _S_x86_andnot(__ai, __bi);
  1866			if constexpr (is_convertible_v<decltype(__r), _TW>)
  1867			  return __r;
  1868			else
  1869			  return reinterpret_cast<typename _TVT::type>(__r);
  1870		      }
  1871		  }
  1872	#endif // _GLIBCXX_SIMD_X86INTRIN
  1873		using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
  1874		return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
  1875					     & __vector_bitcast<_Ip>(__b));
  1876	      }
  1877	    else
  1878	      return ~__a & __b;
  1879	  }
  1880	
  1881	// }}}
  1882	// __not{{{
  1883	template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  1884	  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
  1885	  __not(_Tp __a) noexcept
  1886	  {
  1887	    if constexpr (is_floating_point_v<typename _TVT::value_type>)
  1888	      return reinterpret_cast<typename _TVT::type>(
  1889		~__vector_bitcast<unsigned>(__a));
  1890	    else
  1891	      return ~__a;
  1892	  }
  1893	
  1894	// }}}
  1895	// __concat{{{
  1896	template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
  1897		  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
  1898	  constexpr _R
  1899	  __concat(_Tp a_, _Tp b_)
  1900	  {
  1901	#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
  1902	    using _W
  1903	      = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
  1904			      conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
  1905					    long long, typename _TVT::value_type>>;
  1906	    constexpr int input_width = sizeof(_Tp) / sizeof(_W);
  1907	    const auto __a = __vector_bitcast<_W>(a_);
  1908	    const auto __b = __vector_bitcast<_W>(b_);
  1909	    using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
  1910	#else
  1911	    constexpr int input_width = _TVT::_S_full_size;
  1912	    const _Tp& __a = a_;
  1913	    const _Tp& __b = b_;
  1914	    using _Up = _R;
  1915	#endif
  1916	    if constexpr (input_width == 2)
  1917	      return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
  1918	    else if constexpr (input_width == 4)
  1919	      return reinterpret_cast<_R>(
  1920		_Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
  1921	    else if constexpr (input_width == 8)
  1922	      return reinterpret_cast<_R>(
  1923		_Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
  1924		    __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
  1925	    else if constexpr (input_width == 16)
  1926	      return reinterpret_cast<_R>(
  1927		_Up{__a[0],  __a[1],  __a[2],  __a[3],  __a[4],  __a[5],  __a[6],
  1928		    __a[7],  __a[8],  __a[9],  __a[10], __a[11], __a[12], __a[13],
  1929		    __a[14], __a[15], __b[0],  __b[1],  __b[2],  __b[3],  __b[4],
  1930		    __b[5],  __b[6],  __b[7],  __b[8],  __b[9],  __b[10], __b[11],
  1931		    __b[12], __b[13], __b[14], __b[15]});
  1932	    else if constexpr (input_width == 32)
  1933	      return reinterpret_cast<_R>(
  1934		_Up{__a[0],  __a[1],  __a[2],  __a[3],  __a[4],  __a[5],  __a[6],
  1935		    __a[7],  __a[8],  __a[9],  __a[10], __a[11], __a[12], __a[13],
  1936		    __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
  1937		    __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
  1938		    __a[28], __a[29], __a[30], __a[31], __b[0],  __b[1],  __b[2],
  1939		    __b[3],  __b[4],  __b[5],  __b[6],  __b[7],  __b[8],  __b[9],
  1940		    __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
  1941		    __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
  1942		    __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
  1943		    __b[31]});
  1944	  }
  1945	
  1946	// }}}
  1947	// __zero_extend {{{
  1948	template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  1949	  struct _ZeroExtendProxy
  1950	  {
  1951	    using value_type = typename _TVT::value_type;
  1952	    static constexpr size_t _Np = _TVT::_S_full_size;
  1953	    const _Tp __x;
  1954	
  1955	    template <typename _To, typename _ToVT = _VectorTraits<_To>,
  1956		      typename
  1957		      = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
  1958	      _GLIBCXX_SIMD_INTRINSIC operator _To() const
  1959	      {
  1960		constexpr size_t _ToN = _ToVT::_S_full_size;
  1961		if constexpr (_ToN == _Np)
  1962		  return __x;
  1963		else if constexpr (_ToN == 2 * _Np)
  1964		  {
  1965	#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
  1966		    if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
  1967		      return __vector_bitcast<value_type>(
  1968			_mm256_insertf128_ps(__m256(), __x, 0));
  1969		    else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
  1970		      return __vector_bitcast<value_type>(
  1971			_mm256_insertf128_pd(__m256d(), __x, 0));
  1972		    else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
  1973		      return __vector_bitcast<value_type>(
  1974			_mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
  1975		    else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
  1976		      {
  1977			if constexpr (__have_avx512dq)
  1978			  return __vector_bitcast<value_type>(
  1979			    _mm512_insertf32x8(__m512(), __x, 0));
  1980			else
  1981			  return reinterpret_cast<__m512>(
  1982			    _mm512_insertf64x4(__m512d(),
  1983					       reinterpret_cast<__m256d>(__x), 0));
  1984		      }
  1985		    else if constexpr (__have_avx512f
  1986				       && _TVT::template _S_is<double, 4>)
  1987		      return __vector_bitcast<value_type>(
  1988			_mm512_insertf64x4(__m512d(), __x, 0));
  1989		    else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
  1990		      return __vector_bitcast<value_type>(
  1991			_mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
  1992	#endif
  1993		    return __concat(__x, _Tp());
  1994		  }
  1995		else if constexpr (_ToN == 4 * _Np)
  1996		  {
  1997	#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
  1998		    if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
  1999		      {
  2000			return __vector_bitcast<value_type>(
  2001			  _mm512_insertf64x2(__m512d(), __x, 0));
  2002		      }
  2003		    else if constexpr (__have_avx512f
  2004				       && is_floating_point_v<value_type>)
  2005		      {
  2006			return __vector_bitcast<value_type>(
  2007			  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
  2008					     0));
  2009		      }
  2010		    else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
  2011		      {
  2012			return __vector_bitcast<value_type>(
  2013			  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
  2014		      }
  2015	#endif
  2016		    return __concat(__concat(__x, _Tp()),
  2017				    __vector_type_t<value_type, _Np * 2>());
  2018		  }
  2019		else if constexpr (_ToN == 8 * _Np)
  2020		  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
  2021				  __vector_type_t<value_type, _Np * 4>());
  2022		else if constexpr (_ToN == 16 * _Np)
  2023		  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
  2024				  __vector_type_t<value_type, _Np * 8>());
  2025		else
  2026		  __assert_unreachable<_Tp>();
  2027	      }
  2028	  };
  2029	
  2030	template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  2031	  _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
  2032	  __zero_extend(_Tp __x)
  2033	  { return {__x}; }
  2034	
  2035	// }}}
  2036	// __extract<_Np, By>{{{
  2037	template <int _Offset,
  2038		  int _SplitBy,
  2039		  typename _Tp,
  2040		  typename _TVT = _VectorTraits<_Tp>,
  2041		  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
  2042	  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  2043	  __extract(_Tp __in)
  2044	  {
  2045	    using value_type = typename _TVT::value_type;
  2046	#if _GLIBCXX_SIMD_X86INTRIN // {{{
  2047	    if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
  2048	      {
  2049		if constexpr (__have_avx512dq && is_same_v<double, value_type>)
  2050		  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
  2051		else if constexpr (is_floating_point_v<value_type>)
  2052		  return __vector_bitcast<value_type>(
  2053		    _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
  2054		else
  2055		  return reinterpret_cast<_R>(
  2056		    _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
  2057					      _Offset));
  2058	      }
  2059	    else
  2060	#endif // _GLIBCXX_SIMD_X86INTRIN }}}
  2061	      {
  2062	#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
  2063		using _W = conditional_t<
  2064		  is_floating_point_v<value_type>, double,
  2065		  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
  2066		static_assert(sizeof(_R) % sizeof(_W) == 0);
  2067		constexpr int __return_width = sizeof(_R) / sizeof(_W);
  2068		using _Up = __vector_type_t<_W, __return_width>;
  2069		const auto __x = __vector_bitcast<_W>(__in);
  2070	#else
  2071	      constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
  2072	      using _Up = _R;
  2073	      const __vector_type_t<value_type, _TVT::_S_full_size>& __x
  2074		= __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
  2075	#endif
  2076		constexpr int _O = _Offset * __return_width;
  2077		return __call_with_subscripts<__return_width, _O>(
  2078		  __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  2079		    return reinterpret_cast<_R>(_Up{__entries...});
  2080		  });
  2081	      }
  2082	  }
  2083	
  2084	// }}}
  2085	// __lo/__hi64[z]{{{
  2086	template <typename _Tp,
  2087		  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
  2088	  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  2089	  __lo64(_Tp __x)
  2090	  {
  2091	    _R __r{};
  2092	    __builtin_memcpy(&__r, &__x, 8);
  2093	    return __r;
  2094	  }
  2095	
  2096	template <typename _Tp,
  2097		  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
  2098	  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  2099	  __hi64(_Tp __x)
  2100	  {
  2101	    static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
  2102	    _R __r{};
  2103	    __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
  2104	    return __r;
  2105	  }
  2106	
  2107	template <typename _Tp,
  2108		  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
  2109	  _GLIBCXX_SIMD_INTRINSIC constexpr _R
  2110	  __hi64z([[maybe_unused]] _Tp __x)
  2111	  {
  2112	    _R __r{};
  2113	    if constexpr (sizeof(_Tp) == 16)
  2114	      __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
  2115	    return __r;
  2116	  }
  2117	
  2118	// }}}
  2119	// __lo/__hi128{{{
  2120	template <typename _Tp>
  2121	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  2122	  __lo128(_Tp __x)
  2123	  { return __extract<0, sizeof(_Tp) / 16>(__x); }
  2124	
  2125	template <typename _Tp>
  2126	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  2127	  __hi128(_Tp __x)
  2128	  {
  2129	    static_assert(sizeof(__x) == 32);
  2130	    return __extract<1, 2>(__x);
  2131	  }
  2132	
  2133	// }}}
  2134	// __lo/__hi256{{{
  2135	template <typename _Tp>
  2136	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  2137	  __lo256(_Tp __x)
  2138	  {
  2139	    static_assert(sizeof(__x) == 64);
  2140	    return __extract<0, 2>(__x);
  2141	  }
  2142	
  2143	template <typename _Tp>
  2144	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  2145	  __hi256(_Tp __x)
  2146	  {
  2147	    static_assert(sizeof(__x) == 64);
  2148	    return __extract<1, 2>(__x);
  2149	  }
  2150	
  2151	// }}}
  2152	// __auto_bitcast{{{
  2153	template <typename _Tp>
  2154	  struct _AutoCast
  2155	  {
  2156	    static_assert(__is_vector_type_v<_Tp>);
  2157	
  2158	    const _Tp __x;
  2159	
  2160	    template <typename _Up, typename _UVT = _VectorTraits<_Up>>
  2161	      _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
  2162	      { return __intrin_bitcast<typename _UVT::type>(__x); }
  2163	  };
  2164	
  2165	template <typename _Tp>
  2166	  _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
  2167	  __auto_bitcast(const _Tp& __x)
  2168	  { return {__x}; }
  2169	
  2170	template <typename _Tp, size_t _Np>
  2171	  _GLIBCXX_SIMD_INTRINSIC constexpr
  2172	  _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
  2173	  __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
  2174	  { return {__x._M_data}; }
  2175	
  2176	// }}}
  2177	// ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
  2178	
  2179	#if _GLIBCXX_SIMD_HAVE_SSE_ABI
  2180	// __bool_storage_member_type{{{
  2181	#if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
  2182	template <size_t _Size>
  2183	  struct __bool_storage_member_type
  2184	  {
  2185	    static_assert((_Size & (_Size - 1)) != 0,
  2186			  "This trait may only be used for non-power-of-2 sizes. "
  2187			  "Power-of-2 sizes must be specialized.");
  2188	    using type =
  2189	      typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
  2190	  };
  2191	
  2192	template <>
  2193	  struct __bool_storage_member_type<1> { using type = bool; };
  2194	
  2195	template <>
  2196	  struct __bool_storage_member_type<2> { using type = __mmask8; };
  2197	
  2198	template <>
  2199	  struct __bool_storage_member_type<4> { using type = __mmask8; };
  2200	
  2201	template <>
  2202	  struct __bool_storage_member_type<8> { using type = __mmask8; };
  2203	
  2204	template <>
  2205	  struct __bool_storage_member_type<16> { using type = __mmask16; };
  2206	
  2207	template <>
  2208	  struct __bool_storage_member_type<32> { using type = __mmask32; };
  2209	
  2210	template <>
  2211	  struct __bool_storage_member_type<64> { using type = __mmask64; };
  2212	#endif // _GLIBCXX_SIMD_HAVE_AVX512F
  2213	
  2214	// }}}
  2215	// __intrinsic_type (x86){{{
  2216	// the following excludes bool via __is_vectorizable
  2217	#if _GLIBCXX_SIMD_HAVE_SSE
  2218	template <typename _Tp, size_t _Bytes>
  2219	  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
  2220	  {
  2221	    static_assert(!is_same_v<_Tp, long double>,
  2222			  "no __intrinsic_type support for long double on x86");
  2223	
  2224	    static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
  2225	
  2226	    using type [[__gnu__::__vector_size__(_S_VBytes)]]
  2227	      = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
  2228	  };
  2229	#endif // _GLIBCXX_SIMD_HAVE_SSE
  2230	
  2231	// }}}
  2232	#endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
  2233	// __intrinsic_type (ARM){{{
  2234	#if _GLIBCXX_SIMD_HAVE_NEON
  2235	template <>
  2236	  struct __intrinsic_type<float, 8, void>
  2237	  { using type = float32x2_t; };
  2238	
  2239	template <>
  2240	  struct __intrinsic_type<float, 16, void>
  2241	  { using type = float32x4_t; };
  2242	
  2243	template <>
  2244	  struct __intrinsic_type<double, 8, void>
  2245	  {
  2246	#if _GLIBCXX_SIMD_HAVE_NEON_A64
  2247	   using type = float64x1_t;
  2248	#endif
  2249	  };
  2250	
  2251	template <>
  2252	  struct __intrinsic_type<double, 16, void>
  2253	  {
  2254	#if _GLIBCXX_SIMD_HAVE_NEON_A64
  2255	    using type = float64x2_t;
  2256	#endif
  2257	  };
  2258	
  2259	#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np)                                   \
  2260	template <>                                                                    \
  2261	  struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>,                      \
  2262				  _Np * _Bits / 8, void>                               \
  2263	  { using type = int##_Bits##x##_Np##_t; };                                    \
  2264	template <>                                                                    \
  2265	  struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>,     \
  2266				  _Np * _Bits / 8, void>                               \
  2267	  { using type = uint##_Bits##x##_Np##_t; }
  2268	_GLIBCXX_SIMD_ARM_INTRIN(8, 8);
  2269	_GLIBCXX_SIMD_ARM_INTRIN(8, 16);
  2270	_GLIBCXX_SIMD_ARM_INTRIN(16, 4);
  2271	_GLIBCXX_SIMD_ARM_INTRIN(16, 8);
  2272	_GLIBCXX_SIMD_ARM_INTRIN(32, 2);
  2273	_GLIBCXX_SIMD_ARM_INTRIN(32, 4);
  2274	_GLIBCXX_SIMD_ARM_INTRIN(64, 1);
  2275	_GLIBCXX_SIMD_ARM_INTRIN(64, 2);
  2276	#undef _GLIBCXX_SIMD_ARM_INTRIN
  2277	
  2278	template <typename _Tp, size_t _Bytes>
  2279	  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
  2280	  {
  2281	    static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
  2282	
  2283	    using _Ip = __int_for_sizeof_t<_Tp>;
  2284	
  2285	    using _Up = conditional_t<
  2286	      is_floating_point_v<_Tp>, _Tp,
  2287	      conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
  2288	
  2289	    static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
  2290			  "should use explicit specialization above");
  2291	
  2292	    using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
  2293	  };
  2294	#endif // _GLIBCXX_SIMD_HAVE_NEON
  2295	
  2296	// }}}
  2297	// __intrinsic_type (PPC){{{
  2298	#ifdef __ALTIVEC__
  2299	template <typename _Tp>
  2300	  struct __intrinsic_type_impl;
  2301	
  2302	#define _GLIBCXX_SIMD_PPC_INTRIN(_Tp)                                          \
  2303	  template <>                                                                  \
  2304	    struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
  2305	_GLIBCXX_SIMD_PPC_INTRIN(float);
  2306	#ifdef __VSX__
  2307	_GLIBCXX_SIMD_PPC_INTRIN(double);
  2308	#endif
  2309	_GLIBCXX_SIMD_PPC_INTRIN(signed char);
  2310	_GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
  2311	_GLIBCXX_SIMD_PPC_INTRIN(signed short);
  2312	_GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
  2313	_GLIBCXX_SIMD_PPC_INTRIN(signed int);
  2314	_GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
  2315	#if defined __VSX__ || __SIZEOF_LONG__ == 4
  2316	_GLIBCXX_SIMD_PPC_INTRIN(signed long);
  2317	_GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
  2318	#endif
  2319	#ifdef __VSX__
  2320	_GLIBCXX_SIMD_PPC_INTRIN(signed long long);
  2321	_GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
  2322	#endif
  2323	#undef _GLIBCXX_SIMD_PPC_INTRIN
  2324	
  2325	template <typename _Tp, size_t _Bytes>
  2326	  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
  2327	  {
  2328	    static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
  2329	
  2330	    // allow _Tp == long double with -mlong-double-64
  2331	    static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
  2332			  "no __intrinsic_type support for 128-bit floating point on PowerPC");
  2333	
  2334	#ifndef __VSX__
  2335	    static_assert(!(is_same_v<_Tp, double>
  2336			    || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
  2337			  "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
  2338	#endif
  2339	
  2340	    static constexpr auto __element_type()
  2341	    {
  2342	      if constexpr (is_floating_point_v<_Tp>)
  2343		{
  2344		  if constexpr (_S_is_ldouble)
  2345		    return double {};
  2346		  else
  2347		    return _Tp {};
  2348		}
  2349	      else if constexpr (is_signed_v<_Tp>)
  2350		{
  2351		  if constexpr (sizeof(_Tp) == sizeof(_SChar))
  2352		    return _SChar {};
  2353		  else if constexpr (sizeof(_Tp) == sizeof(short))
  2354		    return short {};
  2355		  else if constexpr (sizeof(_Tp) == sizeof(int))
  2356		    return int {};
  2357		  else if constexpr (sizeof(_Tp) == sizeof(_LLong))
  2358		    return _LLong {};
  2359		}
  2360	      else
  2361		{
  2362		  if constexpr (sizeof(_Tp) == sizeof(_UChar))
  2363		    return _UChar {};
  2364		  else if constexpr (sizeof(_Tp) == sizeof(_UShort))
  2365		    return _UShort {};
  2366		  else if constexpr (sizeof(_Tp) == sizeof(_UInt))
  2367		    return _UInt {};
  2368		  else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
  2369		    return _ULLong {};
  2370		}
  2371	    }
  2372	
  2373	    using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
  2374	  };
  2375	#endif // __ALTIVEC__
  2376	
  2377	// }}}
  2378	// _SimdWrapper<bool>{{{1
  2379	template <size_t _Width>
  2380	  struct _SimdWrapper<bool, _Width,
  2381			      void_t<typename __bool_storage_member_type<_Width>::type>>
  2382	  {
  2383	    using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
  2384	    using value_type = bool;
  2385	
  2386	    static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
  2387	
  2388	    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
  2389	    __as_full_vector() const
  2390	    { return _M_data; }
  2391	
  2392	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2393	    _SimdWrapper() = default;
  2394	
  2395	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2396	    _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
  2397	
  2398	    _GLIBCXX_SIMD_INTRINSIC
  2399	    operator const _BuiltinType&() const
  2400	    { return _M_data; }
  2401	
  2402	    _GLIBCXX_SIMD_INTRINSIC
  2403	    operator _BuiltinType&()
  2404	    { return _M_data; }
  2405	
  2406	    _GLIBCXX_SIMD_INTRINSIC _BuiltinType
  2407	    __intrin() const
  2408	    { return _M_data; }
  2409	
  2410	    _GLIBCXX_SIMD_INTRINSIC constexpr value_type
  2411	    operator[](size_t __i) const
  2412	    { return _M_data & (_BuiltinType(1) << __i); }
  2413	
  2414	    template <size_t __i>
  2415	      _GLIBCXX_SIMD_INTRINSIC constexpr value_type
  2416	      operator[](_SizeConstant<__i>) const
  2417	      { return _M_data & (_BuiltinType(1) << __i); }
  2418	
  2419	    _GLIBCXX_SIMD_INTRINSIC constexpr void
  2420	    _M_set(size_t __i, value_type __x)
  2421	    {
  2422	      if (__x)
  2423		_M_data |= (_BuiltinType(1) << __i);
  2424	      else
  2425		_M_data &= ~(_BuiltinType(1) << __i);
  2426	    }
  2427	
  2428	    _GLIBCXX_SIMD_INTRINSIC constexpr bool
  2429	    _M_is_constprop() const
  2430	    { return __builtin_constant_p(_M_data); }
  2431	
  2432	    _GLIBCXX_SIMD_INTRINSIC constexpr bool
  2433	    _M_is_constprop_none_of() const
  2434	    {
  2435	      if (__builtin_constant_p(_M_data))
  2436		{
  2437		  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
  2438		  constexpr _BuiltinType __active_mask
  2439		    = ~_BuiltinType() >> (__nbits - _Width);
  2440		  return (_M_data & __active_mask) == 0;
  2441		}
  2442	      return false;
  2443	    }
  2444	
  2445	    _GLIBCXX_SIMD_INTRINSIC constexpr bool
  2446	    _M_is_constprop_all_of() const
  2447	    {
  2448	      if (__builtin_constant_p(_M_data))
  2449		{
  2450		  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
  2451		  constexpr _BuiltinType __active_mask
  2452		    = ~_BuiltinType() >> (__nbits - _Width);
  2453		  return (_M_data & __active_mask) == __active_mask;
  2454		}
  2455	      return false;
  2456	    }
  2457	
  2458	    _BuiltinType _M_data;
  2459	  };
  2460	
  2461	// _SimdWrapperBase{{{1
  2462	template <bool _MustZeroInitPadding, typename _BuiltinType>
  2463	  struct _SimdWrapperBase;
  2464	
  2465	template <typename _BuiltinType>
  2466	  struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
  2467	  {
  2468	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2469	    _SimdWrapperBase() = default;
  2470	
  2471	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2472	    _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
  2473	
  2474	    _BuiltinType _M_data;
  2475	  };
  2476	
  2477	template <typename _BuiltinType>
  2478	  struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
  2479						      // never become SNaN
  2480	  {
  2481	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2482	    _SimdWrapperBase() : _M_data() {}
  2483	
  2484	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2485	    _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
  2486	
  2487	    _BuiltinType _M_data;
  2488	  };
  2489	
  2490	// }}}
  2491	// _SimdWrapper{{{
  2492	template <typename _Tp, size_t _Width>
  2493	  struct _SimdWrapper<
  2494	    _Tp, _Width,
  2495	    void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
  2496	    : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
  2497				 && sizeof(_Tp) * _Width
  2498				      == sizeof(__vector_type_t<_Tp, _Width>),
  2499			       __vector_type_t<_Tp, _Width>>
  2500	  {
  2501	    using _Base
  2502	      = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
  2503				   && sizeof(_Tp) * _Width
  2504					== sizeof(__vector_type_t<_Tp, _Width>),
  2505				 __vector_type_t<_Tp, _Width>>;
  2506	
  2507	    static_assert(__is_vectorizable_v<_Tp>);
  2508	    static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
  2509	
  2510	    using _BuiltinType = __vector_type_t<_Tp, _Width>;
  2511	    using value_type = _Tp;
  2512	
  2513	    static inline constexpr size_t _S_full_size
  2514	      = sizeof(_BuiltinType) / sizeof(value_type);
  2515	    static inline constexpr int _S_size = _Width;
  2516	    static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
  2517	
  2518	    using _Base::_M_data;
  2519	
  2520	    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
  2521	    __as_full_vector() const
  2522	    { return _M_data; }
  2523	
  2524	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2525	    _SimdWrapper(initializer_list<_Tp> __init)
  2526	    : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
  2527		      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  2528			return __init.begin()[__i.value];
  2529		      })) {}
  2530	
  2531	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2532	    _SimdWrapper() = default;
  2533	
  2534	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2535	    _SimdWrapper(const _SimdWrapper&) = default;
  2536	
  2537	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2538	    _SimdWrapper(_SimdWrapper&&) = default;
  2539	
  2540	    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
  2541	    operator=(const _SimdWrapper&) = default;
  2542	
  2543	    _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
  2544	    operator=(_SimdWrapper&&) = default;
  2545	
  2546	    template <typename _V, typename = enable_if_t<disjunction_v<
  2547				     is_same<_V, __vector_type_t<_Tp, _Width>>,
  2548				     is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
  2549	      _GLIBCXX_SIMD_INTRINSIC constexpr
  2550	      _SimdWrapper(_V __x)
  2551	      // __vector_bitcast can convert e.g. __m128 to __vector(2) float
  2552	      : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
  2553	
  2554	    template <typename... _As,
  2555		      typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
  2556					      && sizeof...(_As) <= _Width)>>
  2557	      _GLIBCXX_SIMD_INTRINSIC constexpr
  2558	      operator _SimdTuple<_Tp, _As...>() const
  2559	      {
  2560		return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
  2561			 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
  2562			 { return _M_data[int(__i)]; });
  2563	      }
  2564	
  2565	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2566	    operator const _BuiltinType&() const
  2567	    { return _M_data; }
  2568	
  2569	    _GLIBCXX_SIMD_INTRINSIC constexpr
  2570	    operator _BuiltinType&()
  2571	    { return _M_data; }
  2572	
  2573	    _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
  2574	    operator[](size_t __i) const
  2575	    { return _M_data[__i]; }
  2576	
  2577	    template <size_t __i>
  2578	      _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
  2579	      operator[](_SizeConstant<__i>) const
  2580	      { return _M_data[__i]; }
  2581	
  2582	    _GLIBCXX_SIMD_INTRINSIC constexpr void
  2583	    _M_set(size_t __i, _Tp __x)
  2584	    {
  2585	      if (__builtin_is_constant_evaluated())
  2586		_M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
  2587			    return __j == __i ? __x : _M_data[__j()];
  2588			  });
  2589	      else
  2590		_M_data[__i] = __x;
  2591	    }
  2592	
  2593	    _GLIBCXX_SIMD_INTRINSIC
  2594	    constexpr bool
  2595	    _M_is_constprop() const
  2596	    { return __builtin_constant_p(_M_data); }
  2597	
  2598	    _GLIBCXX_SIMD_INTRINSIC constexpr bool
  2599	    _M_is_constprop_none_of() const
  2600	    {
  2601	      if (__builtin_constant_p(_M_data))
  2602		{
  2603		  bool __r = true;
  2604		  if constexpr (is_floating_point_v<_Tp>)
  2605		    {
  2606		      using _Ip = __int_for_sizeof_t<_Tp>;
  2607		      const auto __intdata = __vector_bitcast<_Ip>(_M_data);
  2608		      __execute_n_times<_Width>(
  2609			[&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
  2610		    }
  2611		  else
  2612		    __execute_n_times<_Width>(
  2613		      [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
  2614		  if (__builtin_constant_p(__r))
  2615		    return __r;
  2616		}
  2617	      return false;
  2618	    }
  2619	
  2620	    _GLIBCXX_SIMD_INTRINSIC constexpr bool
  2621	    _M_is_constprop_all_of() const
  2622	    {
  2623	      if (__builtin_constant_p(_M_data))
  2624		{
  2625		  bool __r = true;
  2626		  if constexpr (is_floating_point_v<_Tp>)
  2627		    {
  2628		      using _Ip = __int_for_sizeof_t<_Tp>;
  2629		      const auto __intdata = __vector_bitcast<_Ip>(_M_data);
  2630		      __execute_n_times<_Width>(
  2631			[&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
  2632		    }
  2633		  else
  2634		    __execute_n_times<_Width>(
  2635		      [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
  2636		  if (__builtin_constant_p(__r))
  2637		    return __r;
  2638		}
  2639	      return false;
  2640	    }
  2641	  };
  2642	
  2643	// }}}
  2644	
  2645	// __vectorized_sizeof {{{
  2646	template <typename _Tp>
  2647	  constexpr size_t
  2648	  __vectorized_sizeof()
  2649	  {
  2650	    if constexpr (!__is_vectorizable_v<_Tp>)
  2651	      return 0;
  2652	
  2653	    if constexpr (sizeof(_Tp) <= 8)
  2654	      {
  2655		// X86:
  2656		if constexpr (__have_avx512bw)
  2657		  return 64;
  2658		if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
  2659		  return 64;
  2660		if constexpr (__have_avx2)
  2661		  return 32;
  2662		if constexpr (__have_avx && is_floating_point_v<_Tp>)
  2663		  return 32;
  2664		if constexpr (__have_sse2)
  2665		  return 16;
  2666		if constexpr (__have_sse && is_same_v<_Tp, float>)
  2667		  return 16;
  2668		/* The following is too much trouble because of mixed MMX and x87 code.
  2669		 * While nothing here explicitly calls MMX instructions of registers,
  2670		 * they are still emitted but no EMMS cleanup is done.
  2671		if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
  2672		  return 8;
  2673		 */
  2674	
  2675		// PowerPC:
  2676		if constexpr (__have_power8vec
  2677			      || (__have_power_vmx && (sizeof(_Tp) < 8))
  2678			      || (__have_power_vsx && is_floating_point_v<_Tp>) )
  2679		  return 16;
  2680	
  2681		// ARM:
  2682		if constexpr (__have_neon_a64
  2683			      || (__have_neon_a32 && !is_same_v<_Tp, double>) )
  2684		  return 16;
  2685		if constexpr (__have_neon
  2686			      && sizeof(_Tp) < 8
  2687			      // Only allow fp if the user allows non-ICE559 fp (e.g.
  2688			      // via -ffast-math). ARMv7 NEON fp is not conforming to
  2689			      // IEC559.
  2690			      && (__support_neon_float || !is_floating_point_v<_Tp>))
  2691		  return 16;
  2692	      }
  2693	
  2694	    return sizeof(_Tp);
  2695	  }
  2696	
  2697	// }}}
  2698	namespace simd_abi {
  2699	// most of simd_abi is defined in simd_detail.h
  2700	template <typename _Tp>
  2701	  inline constexpr int max_fixed_size
  2702	    = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
  2703	
  2704	// compatible {{{
  2705	#if defined __x86_64__ || defined __aarch64__
  2706	template <typename _Tp>
  2707	  using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
  2708	#elif defined __ARM_NEON
  2709	// FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
  2710	// ABI?)
  2711	template <typename _Tp>
  2712	  using compatible
  2713	    = conditional_t<(sizeof(_Tp) < 8
  2714			     && (__support_neon_float || !is_floating_point_v<_Tp>)),
  2715			    _VecBuiltin<16>, scalar>;
  2716	#else
  2717	template <typename>
  2718	  using compatible = scalar;
  2719	#endif
  2720	
  2721	// }}}
  2722	// native {{{
  2723	template <typename _Tp>
  2724	  constexpr auto
  2725	  __determine_native_abi()
  2726	  {
  2727	    constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
  2728	    if constexpr (__bytes == sizeof(_Tp))
  2729	      return static_cast<scalar*>(nullptr);
  2730	    else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
  2731	      return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
  2732	    else
  2733	      return static_cast<_VecBuiltin<__bytes>*>(nullptr);
  2734	  }
  2735	
  2736	template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
  2737	  using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
  2738	
  2739	// }}}
  2740	// __default_abi {{{
  2741	#if defined _GLIBCXX_SIMD_DEFAULT_ABI
  2742	template <typename _Tp>
  2743	  using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
  2744	#else
  2745	template <typename _Tp>
  2746	  using __default_abi = compatible<_Tp>;
  2747	#endif
  2748	
  2749	// }}}
  2750	} // namespace simd_abi
  2751	
  2752	// traits {{{1
  2753	template <typename _Tp>
  2754	  struct is_simd_flag_type
  2755	  : false_type
  2756	  {};
  2757	
  2758	template <>
  2759	  struct is_simd_flag_type<element_aligned_tag>
  2760	  : true_type
  2761	  {};
  2762	
  2763	template <>
  2764	  struct is_simd_flag_type<vector_aligned_tag>
  2765	  : true_type
  2766	  {};
  2767	
  2768	template <size_t _Np>
  2769	  struct is_simd_flag_type<overaligned_tag<_Np>>
  2770	  : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
  2771	  {};
  2772	
  2773	template <typename _Tp>
  2774	  inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
  2775	
  2776	template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
  2777	  using _IsSimdFlagType = _Tp;
  2778	
  2779	// is_abi_tag {{{2
  2780	template <typename _Tp, typename = void_t<>>
  2781	  struct is_abi_tag : false_type {};
  2782	
  2783	template <typename _Tp>
  2784	  struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
  2785	  : public _Tp::_IsValidAbiTag {};
  2786	
  2787	template <typename _Tp>
  2788	  inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
  2789	
  2790	// is_simd(_mask) {{{2
  2791	template <typename _Tp>
  2792	  struct is_simd : public false_type {};
  2793	
  2794	template <typename _Tp>
  2795	  inline constexpr bool is_simd_v = is_simd<_Tp>::value;
  2796	
  2797	template <typename _Tp>
  2798	  struct is_simd_mask : public false_type {};
  2799	
  2800	template <typename _Tp>
  2801	inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
  2802	
  2803	// simd_size {{{2
  2804	template <typename _Tp, typename _Abi, typename = void>
  2805	  struct __simd_size_impl {};
  2806	
  2807	template <typename _Tp, typename _Abi>
  2808	  struct __simd_size_impl<
  2809	    _Tp, _Abi,
  2810	    enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
  2811	    : _SizeConstant<_Abi::template _S_size<_Tp>> {};
  2812	
  2813	template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
  2814	  struct simd_size : __simd_size_impl<_Tp, _Abi> {};
  2815	
  2816	template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
  2817	  inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
  2818	
  2819	// simd_abi::deduce {{{2
  2820	template <typename _Tp, size_t _Np, typename = void>
  2821	  struct __deduce_impl;
  2822	
  2823	namespace simd_abi {
  2824	/**
  2825	 * @tparam _Tp   The requested `value_type` for the elements.
  2826	 * @tparam _Np    The requested number of elements.
  2827	 * @tparam _Abis This parameter is ignored, since this implementation cannot
  2828	 * make any use of it. Either __a good native ABI is matched and used as `type`
  2829	 * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
  2830	 * the best matching native ABIs.
  2831	 */
  2832	template <typename _Tp, size_t _Np, typename...>
  2833	  struct deduce : __deduce_impl<_Tp, _Np> {};
  2834	
  2835	template <typename _Tp, size_t _Np, typename... _Abis>
  2836	  using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
  2837	} // namespace simd_abi
  2838	
  2839	// }}}2
  2840	// rebind_simd {{{2
  2841	template <typename _Tp, typename _V, typename = void>
  2842	  struct rebind_simd;
  2843	
  2844	template <typename _Tp, typename _Up, typename _Abi>
  2845	  struct rebind_simd<_Tp, simd<_Up, _Abi>,
  2846			     void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
  2847	  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
  2848	
  2849	template <typename _Tp, typename _Up, typename _Abi>
  2850	  struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
  2851			     void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
  2852	  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
  2853	
  2854	template <typename _Tp, typename _V>
  2855	  using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
  2856	
  2857	// resize_simd {{{2
  2858	template <int _Np, typename _V, typename = void>
  2859	  struct resize_simd;
  2860	
  2861	template <int _Np, typename _Tp, typename _Abi>
  2862	  struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
  2863	  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
  2864	
  2865	template <int _Np, typename _Tp, typename _Abi>
  2866	  struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
  2867	  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
  2868	
  2869	template <int _Np, typename _V>
  2870	  using resize_simd_t = typename resize_simd<_Np, _V>::type;
  2871	
  2872	// }}}2
  2873	// memory_alignment {{{2
  2874	template <typename _Tp, typename _Up = typename _Tp::value_type>
  2875	  struct memory_alignment
  2876	  : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
  2877	
  2878	template <typename _Tp, typename _Up = typename _Tp::value_type>
  2879	  inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
  2880	
  2881	// class template simd [simd] {{{1
  2882	template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
  2883	  class simd;
  2884	
  2885	template <typename _Tp, typename _Abi>
  2886	  struct is_simd<simd<_Tp, _Abi>> : public true_type {};
  2887	
  2888	template <typename _Tp>
  2889	  using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
  2890	
  2891	template <typename _Tp, int _Np>
  2892	  using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
  2893	
  2894	template <typename _Tp, size_t _Np>
  2895	  using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
  2896	
  2897	// class template simd_mask [simd_mask] {{{1
  2898	template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
  2899	  class simd_mask;
  2900	
  2901	template <typename _Tp, typename _Abi>
  2902	  struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
  2903	
  2904	template <typename _Tp>
  2905	  using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
  2906	
  2907	template <typename _Tp, int _Np>
  2908	  using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
  2909	
  2910	template <typename _Tp, size_t _Np>
  2911	  using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
  2912	
  2913	// casts [simd.casts] {{{1
  2914	// static_simd_cast {{{2
  2915	template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
  2916	  struct __static_simd_cast_return_type;
  2917	
  2918	template <typename _Tp, typename _A0, typename _Up, typename _Ap>
  2919	  struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
  2920	  : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
  2921	
  2922	template <typename _Tp, typename _Up, typename _Ap>
  2923	  struct __static_simd_cast_return_type<
  2924	    _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
  2925	  { using type = _Tp; };
  2926	
  2927	template <typename _Tp, typename _Ap>
  2928	  struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
  2929	#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
  2930						enable_if_t<__is_vectorizable_v<_Tp>>
  2931	#else
  2932						void
  2933	#endif
  2934						>
  2935	  { using type = simd<_Tp, _Ap>; };
  2936	
  2937	template <typename _Tp, typename = void>
  2938	  struct __safe_make_signed { using type = _Tp;};
  2939	
  2940	template <typename _Tp>
  2941	  struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
  2942	  {
  2943	    // the extra make_unsigned_t is because of PR85951
  2944	    using type = make_signed_t<make_unsigned_t<_Tp>>;
  2945	  };
  2946	
  2947	template <typename _Tp>
  2948	  using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
  2949	
  2950	template <typename _Tp, typename _Up, typename _Ap>
  2951	  struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
  2952	#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
  2953						enable_if_t<__is_vectorizable_v<_Tp>>
  2954	#else
  2955						void
  2956	#endif
  2957						>
  2958	  {
  2959	    using type = conditional_t<
  2960	      (is_integral_v<_Up> && is_integral_v<_Tp> &&
  2961	#ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
  2962	       is_signed_v<_Up> != is_signed_v<_Tp> &&
  2963	#endif
  2964	       is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
  2965	      simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
  2966	  };
  2967	
  2968	template <typename _Tp, typename _Up, typename _Ap,
  2969		  typename _R
  2970		  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
  2971	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
  2972	  static_simd_cast(const simd<_Up, _Ap>& __x)
  2973	  {
  2974	    if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
  2975	      return __x;
  2976	    else
  2977	      {
  2978		_SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
  2979		  __c;
  2980		return _R(__private_init, __c(__data(__x)));
  2981	      }
  2982	  }
  2983	
  2984	namespace __proposed {
  2985	template <typename _Tp, typename _Up, typename _Ap,
  2986		  typename _R
  2987		  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
  2988	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
  2989	  static_simd_cast(const simd_mask<_Up, _Ap>& __x)
  2990	  {
  2991	    using _RM = typename _R::mask_type;
  2992	    return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
  2993				      typename _RM::simd_type::value_type>(__x)};
  2994	  }
  2995	} // namespace __proposed
  2996	
  2997	// simd_cast {{{2
  2998	template <typename _Tp, typename _Up, typename _Ap,
  2999		  typename _To = __value_type_or_identity_t<_Tp>>
  3000	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
  3001	  simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
  3002	    -> decltype(static_simd_cast<_Tp>(__x))
  3003	  { return static_simd_cast<_Tp>(__x); }
  3004	
  3005	namespace __proposed {
  3006	template <typename _Tp, typename _Up, typename _Ap,
  3007		  typename _To = __value_type_or_identity_t<_Tp>>
  3008	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
  3009	  simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
  3010	    -> decltype(static_simd_cast<_Tp>(__x))
  3011	  { return static_simd_cast<_Tp>(__x); }
  3012	} // namespace __proposed
  3013	
  3014	// }}}2
  3015	// resizing_simd_cast {{{
  3016	namespace __proposed {
  3017	/* Proposed spec:
  3018	
  3019	template <class T, class U, class Abi>
  3020	T resizing_simd_cast(const simd<U, Abi>& x)
  3021	
  3022	p1  Constraints:
  3023	    - is_simd_v<T> is true and
  3024	    - T::value_type is the same type as U
  3025	
  3026	p2  Returns:
  3027	    A simd object with the i^th element initialized to x[i] for all i in the
  3028	    range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
  3029	    than simd_size_v<U, Abi>, the remaining elements are value-initialized.
  3030	
  3031	template <class T, class U, class Abi>
  3032	T resizing_simd_cast(const simd_mask<U, Abi>& x)
  3033	
  3034	p1  Constraints: is_simd_mask_v<T> is true
  3035	
  3036	p2  Returns:
  3037	    A simd_mask object with the i^th element initialized to x[i] for all i in
  3038	the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
  3039	    than simd_size_v<U, Abi>, the remaining elements are initialized to false.
  3040	
  3041	 */
  3042	
  3043	template <typename _Tp, typename _Up, typename _Ap>
  3044	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
  3045	  conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
  3046	  resizing_simd_cast(const simd<_Up, _Ap>& __x)
  3047	  {
  3048	    if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
  3049	      return __x;
  3050	    else if (__builtin_is_constant_evaluated())
  3051	      return _Tp([&](auto __i) constexpr {
  3052		       return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
  3053		     });
  3054	    else if constexpr (simd_size_v<_Up, _Ap> == 1)
  3055	      {
  3056		_Tp __r{};
  3057		__r[0] = __x[0];
  3058		return __r;
  3059	      }
  3060	    else if constexpr (_Tp::size() == 1)
  3061	      return __x[0];
  3062	    else if constexpr (sizeof(_Tp) == sizeof(__x)
  3063			       && !__is_fixed_size_abi_v<_Ap>)
  3064	      return {__private_init,
  3065		      __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
  3066			_Ap::_S_masked(__data(__x))._M_data)};
  3067	    else
  3068	      {
  3069		_Tp __r{};
  3070		__builtin_memcpy(&__data(__r), &__data(__x),
  3071				 sizeof(_Up)
  3072				   * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
  3073		return __r;
  3074	      }
  3075	  }
  3076	
  3077	template <typename _Tp, typename _Up, typename _Ap>
  3078	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  3079	  enable_if_t<is_simd_mask_v<_Tp>, _Tp>
  3080	  resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
  3081	  {
  3082	    return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
  3083				      typename _Tp::simd_type::value_type>(__x)};
  3084	  }
  3085	} // namespace __proposed
  3086	
  3087	// }}}
  3088	// to_fixed_size {{{2
  3089	template <typename _Tp, int _Np>
  3090	  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
  3091	  to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
  3092	  { return __x; }
  3093	
  3094	template <typename _Tp, int _Np>
  3095	  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
  3096	  to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
  3097	  { return __x; }
  3098	
  3099	template <typename _Tp, typename _Ap>
  3100	  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
  3101	  to_fixed_size(const simd<_Tp, _Ap>& __x)
  3102	  {
  3103	    using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
  3104	    return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
  3105	  }
  3106	
  3107	template <typename _Tp, typename _Ap>
  3108	  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
  3109	  to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
  3110	  {
  3111	    return {__private_init,
  3112		    [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
  3113	  }
  3114	
  3115	// to_native {{{2
  3116	template <typename _Tp, int _Np>
  3117	  _GLIBCXX_SIMD_INTRINSIC
  3118	  enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
  3119	  to_native(const fixed_size_simd<_Tp, _Np>& __x)
  3120	  {
  3121	    alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
  3122	    __x.copy_to(__mem, vector_aligned);
  3123	    return {__mem, vector_aligned};
  3124	  }
  3125	
  3126	template <typename _Tp, int _Np>
  3127	  _GLIBCXX_SIMD_INTRINSIC
  3128	  enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
  3129	  to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
  3130	  {
  3131	    return native_simd_mask<_Tp>(
  3132		     __private_init,
  3133		     [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
  3134	  }
  3135	
  3136	// to_compatible {{{2
  3137	template <typename _Tp, int _Np>
  3138	  _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
  3139	  to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
  3140	  {
  3141	    alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
  3142	    __x.copy_to(__mem, vector_aligned);
  3143	    return {__mem, vector_aligned};
  3144	  }
  3145	
  3146	template <typename _Tp, int _Np>
  3147	  _GLIBCXX_SIMD_INTRINSIC
  3148	  enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
  3149	  to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
  3150	  {
  3151	    return simd_mask<_Tp>(
  3152		     __private_init,
  3153		     [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
  3154	  }
  3155	
  3156	// masked assignment [simd_mask.where] {{{1
  3157	
  3158	// where_expression {{{1
  3159	// const_where_expression<M, T> {{{2
  3160	template <typename _M, typename _Tp>
  3161	  class const_where_expression
  3162	  {
  3163	    using _V = _Tp;
  3164	    static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
  3165	
  3166	    struct _Wrapper { using value_type = _V; };
  3167	
  3168	  protected:
  3169	    using _Impl = typename _V::_Impl;
  3170	
  3171	    using value_type =
  3172	      typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
  3173	
  3174	    _GLIBCXX_SIMD_INTRINSIC friend const _M&
  3175	    __get_mask(const const_where_expression& __x)
  3176	    { return __x._M_k; }
  3177	
  3178	    _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
  3179	    __get_lvalue(const const_where_expression& __x)
  3180	    { return __x._M_value; }
  3181	
  3182	    const _M& _M_k;
  3183	    _Tp& _M_value;
  3184	
  3185	  public:
  3186	    const_where_expression(const const_where_expression&) = delete;
  3187	
  3188	    const_where_expression& operator=(const const_where_expression&) = delete;
  3189	
  3190	    _GLIBCXX_SIMD_INTRINSIC constexpr
  3191	    const_where_expression(const _M& __kk, const _Tp& dd)
  3192	    : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
  3193	
  3194	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
  3195	    operator-() const&&
  3196	    {
  3197	      return {__private_init,
  3198		      _Impl::template _S_masked_unary<negate>(__data(_M_k),
  3199							      __data(_M_value))};
  3200	    }
  3201	
  3202	    template <typename _Up, typename _Flags>
  3203	      [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
  3204	      copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
  3205	      {
  3206		return {__private_init,
  3207			_Impl::_S_masked_load(__data(_M_value), __data(_M_k),
  3208					      _Flags::template _S_apply<_V>(__mem))};
  3209	      }
  3210	
  3211	    template <typename _Up, typename _Flags>
  3212	      _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3213	      copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
  3214	      {
  3215		_Impl::_S_masked_store(__data(_M_value),
  3216				       _Flags::template _S_apply<_V>(__mem),
  3217				       __data(_M_k));
  3218	      }
  3219	  };
  3220	
  3221	// const_where_expression<bool, T> {{{2
  3222	template <typename _Tp>
  3223	  class const_where_expression<bool, _Tp>
  3224	  {
  3225	    using _M = bool;
  3226	    using _V = _Tp;
  3227	
  3228	    static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
  3229	
  3230	    struct _Wrapper { using value_type = _V; };
  3231	
  3232	  protected:
  3233	    using value_type
  3234	      = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
  3235	
  3236	    _GLIBCXX_SIMD_INTRINSIC friend const _M&
  3237	    __get_mask(const const_where_expression& __x)
  3238	    { return __x._M_k; }
  3239	
  3240	    _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
  3241	    __get_lvalue(const const_where_expression& __x)
  3242	    { return __x._M_value; }
  3243	
  3244	    const bool _M_k;
  3245	    _Tp& _M_value;
  3246	
  3247	  public:
  3248	    const_where_expression(const const_where_expression&) = delete;
  3249	    const_where_expression& operator=(const const_where_expression&) = delete;
  3250	
  3251	    _GLIBCXX_SIMD_INTRINSIC constexpr
  3252	    const_where_expression(const bool __kk, const _Tp& dd)
  3253	    : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
  3254	
  3255	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
  3256	    operator-() const&&
  3257	    { return _M_k ? -_M_value : _M_value; }
  3258	
  3259	    template <typename _Up, typename _Flags>
  3260	      [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
  3261	      copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
  3262	      { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
  3263	
  3264	    template <typename _Up, typename _Flags>
  3265	      _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3266	      copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
  3267	      {
  3268		if (_M_k)
  3269		  __mem[0] = _M_value;
  3270	      }
  3271	  };
  3272	
  3273	// where_expression<M, T> {{{2
  3274	template <typename _M, typename _Tp>
  3275	  class where_expression : public const_where_expression<_M, _Tp>
  3276	  {
  3277	    using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
  3278	
  3279	    static_assert(!is_const<_Tp>::value,
  3280			  "where_expression may only be instantiated with __a non-const "
  3281			  "_Tp parameter");
  3282	
  3283	    using typename const_where_expression<_M, _Tp>::value_type;
  3284	    using const_where_expression<_M, _Tp>::_M_k;
  3285	    using const_where_expression<_M, _Tp>::_M_value;
  3286	
  3287	    static_assert(
  3288	      is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
  3289	    static_assert(_M::size() == _Tp::size(), "");
  3290	
  3291	    _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
  3292	    __get_lvalue(where_expression& __x)
  3293	    { return __x._M_value; }
  3294	
  3295	  public:
  3296	    where_expression(const where_expression&) = delete;
  3297	    where_expression& operator=(const where_expression&) = delete;
  3298	
  3299	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  3300	    where_expression(const _M& __kk, _Tp& dd)
  3301	    : const_where_expression<_M, _Tp>(__kk, dd) {}
  3302	
  3303	    template <typename _Up>
  3304	      _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3305	      operator=(_Up&& __x) &&
  3306	      {
  3307		_Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
  3308					__to_value_type_or_member_type<_Tp>(
  3309					  static_cast<_Up&&>(__x)));
  3310	      }
  3311	
  3312	#define _GLIBCXX_SIMD_OP_(__op, __name)                                        \
  3313	  template <typename _Up>                                                      \
  3314	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void                       \
  3315	    operator __op##=(_Up&& __x)&&                                              \
  3316	    {                                                                          \
  3317	      _Impl::template _S_masked_cassign(                                       \
  3318		__data(_M_k), __data(_M_value),                                        \
  3319		__to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)),          \
  3320		[](auto __impl, auto __lhs, auto __rhs)                                \
  3321		  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA                         \
  3322		{ return __impl.__name(__lhs, __rhs); });                              \
  3323	    }                                                                          \
  3324	  static_assert(true)
  3325	    _GLIBCXX_SIMD_OP_(+, _S_plus);
  3326	    _GLIBCXX_SIMD_OP_(-, _S_minus);
  3327	    _GLIBCXX_SIMD_OP_(*, _S_multiplies);
  3328	    _GLIBCXX_SIMD_OP_(/, _S_divides);
  3329	    _GLIBCXX_SIMD_OP_(%, _S_modulus);
  3330	    _GLIBCXX_SIMD_OP_(&, _S_bit_and);
  3331	    _GLIBCXX_SIMD_OP_(|, _S_bit_or);
  3332	    _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
  3333	    _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
  3334	    _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
  3335	#undef _GLIBCXX_SIMD_OP_
  3336	
  3337	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3338	    operator++() &&
  3339	    {
  3340	      __data(_M_value)
  3341		= _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
  3342	    }
  3343	
  3344	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3345	    operator++(int) &&
  3346	    {
  3347	      __data(_M_value)
  3348		= _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
  3349	    }
  3350	
  3351	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3352	    operator--() &&
  3353	    {
  3354	      __data(_M_value)
  3355		= _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
  3356	    }
  3357	
  3358	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3359	    operator--(int) &&
  3360	    {
  3361	      __data(_M_value)
  3362		= _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
  3363	    }
  3364	
  3365	    // intentionally hides const_where_expression::copy_from
  3366	    template <typename _Up, typename _Flags>
  3367	      _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3368	      copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
  3369	      {
  3370		__data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
  3371							 _Flags::template _S_apply<_Tp>(__mem));
  3372	      }
  3373	  };
  3374	
  3375	// where_expression<bool, T> {{{2
  3376	template <typename _Tp>
  3377	  class where_expression<bool, _Tp>
  3378	  : public const_where_expression<bool, _Tp>
  3379	  {
  3380	    using _M = bool;
  3381	    using typename const_where_expression<_M, _Tp>::value_type;
  3382	    using const_where_expression<_M, _Tp>::_M_k;
  3383	    using const_where_expression<_M, _Tp>::_M_value;
  3384	
  3385	  public:
  3386	    where_expression(const where_expression&) = delete;
  3387	    where_expression& operator=(const where_expression&) = delete;
  3388	
  3389	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  3390	    where_expression(const _M& __kk, _Tp& dd)
  3391	    : const_where_expression<_M, _Tp>(__kk, dd) {}
  3392	
  3393	#define _GLIBCXX_SIMD_OP_(__op)                                                \
  3394	    template <typename _Up>                                                    \
  3395	      _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void                     \
  3396	      operator __op(_Up&& __x)&&                                               \
  3397	      { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
  3398	
  3399	    _GLIBCXX_SIMD_OP_(=)
  3400	    _GLIBCXX_SIMD_OP_(+=)
  3401	    _GLIBCXX_SIMD_OP_(-=)
  3402	    _GLIBCXX_SIMD_OP_(*=)
  3403	    _GLIBCXX_SIMD_OP_(/=)
  3404	    _GLIBCXX_SIMD_OP_(%=)
  3405	    _GLIBCXX_SIMD_OP_(&=)
  3406	    _GLIBCXX_SIMD_OP_(|=)
  3407	    _GLIBCXX_SIMD_OP_(^=)
  3408	    _GLIBCXX_SIMD_OP_(<<=)
  3409	    _GLIBCXX_SIMD_OP_(>>=)
  3410	  #undef _GLIBCXX_SIMD_OP_
  3411	
  3412	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3413	    operator++() &&
  3414	    { if (_M_k) ++_M_value; }
  3415	
  3416	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3417	    operator++(int) &&
  3418	    { if (_M_k) ++_M_value; }
  3419	
  3420	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3421	    operator--() &&
  3422	    { if (_M_k) --_M_value; }
  3423	
  3424	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3425	    operator--(int) &&
  3426	    { if (_M_k) --_M_value; }
  3427	
  3428	    // intentionally hides const_where_expression::copy_from
  3429	    template <typename _Up, typename _Flags>
  3430	      _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
  3431	      copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
  3432	      { if (_M_k) _M_value = __mem[0]; }
  3433	  };
  3434	
  3435	// where {{{1
  3436	template <typename _Tp, typename _Ap>
  3437	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  3438	  where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
  3439	  where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
  3440	  { return {__k, __value}; }
  3441	
  3442	template <typename _Tp, typename _Ap>
  3443	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  3444	  const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
  3445	  where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
  3446	  { return {__k, __value}; }
  3447	
  3448	template <typename _Tp, typename _Ap>
  3449	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  3450	  where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
  3451	  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
  3452	  { return {__k, __value}; }
  3453	
  3454	template <typename _Tp, typename _Ap>
  3455	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  3456	  const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
  3457	  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
  3458	  { return {__k, __value}; }
  3459	
  3460	template <typename _Tp>
  3461	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
  3462	  where(_ExactBool __k, _Tp& __value)
  3463	  { return {__k, __value}; }
  3464	
  3465	template <typename _Tp>
  3466	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
  3467	  where(_ExactBool __k, const _Tp& __value)
  3468	  { return {__k, __value}; }
  3469	
  3470	template <typename _Tp, typename _Ap>
  3471	  _GLIBCXX_SIMD_CONSTEXPR void
  3472	  where(bool __k, simd<_Tp, _Ap>& __value) = delete;
  3473	
  3474	template <typename _Tp, typename _Ap>
  3475	  _GLIBCXX_SIMD_CONSTEXPR void
  3476	  where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
  3477	
  3478	// proposed mask iterations {{{1
  3479	namespace __proposed {
  3480	template <size_t _Np>
  3481	  class where_range
  3482	  {
  3483	    const bitset<_Np> __bits;
  3484	
  3485	  public:
  3486	    where_range(bitset<_Np> __b) : __bits(__b) {}
  3487	
  3488	    class iterator
  3489	    {
  3490	      size_t __mask;
  3491	      size_t __bit;
  3492	
  3493	      _GLIBCXX_SIMD_INTRINSIC void
  3494	      __next_bit()
  3495	      { __bit = __builtin_ctzl(__mask); }
  3496	
  3497	      _GLIBCXX_SIMD_INTRINSIC void
  3498	      __reset_lsb()
  3499	      {
  3500		// 01100100 - 1 = 01100011
  3501		__mask &= (__mask - 1);
  3502		// __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
  3503	      }
  3504	
  3505	    public:
  3506	      iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
  3507	      iterator(const iterator&) = default;
  3508	      iterator(iterator&&) = default;
  3509	
  3510	      _GLIBCXX_SIMD_ALWAYS_INLINE size_t
  3511	      operator->() const
  3512	      { return __bit; }
  3513	
  3514	      _GLIBCXX_SIMD_ALWAYS_INLINE size_t
  3515	      operator*() const
  3516	      { return __bit; }
  3517	
  3518	      _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
  3519	      operator++()
  3520	      {
  3521		__reset_lsb();
  3522		__next_bit();
  3523		return *this;
  3524	      }
  3525	
  3526	      _GLIBCXX_SIMD_ALWAYS_INLINE iterator
  3527	      operator++(int)
  3528	      {
  3529		iterator __tmp = *this;
  3530		__reset_lsb();
  3531		__next_bit();
  3532		return __tmp;
  3533	      }
  3534	
  3535	      _GLIBCXX_SIMD_ALWAYS_INLINE bool
  3536	      operator==(const iterator& __rhs) const
  3537	      { return __mask == __rhs.__mask; }
  3538	
  3539	      _GLIBCXX_SIMD_ALWAYS_INLINE bool
  3540	      operator!=(const iterator& __rhs) const
  3541	      { return __mask != __rhs.__mask; }
  3542	    };
  3543	
  3544	    iterator
  3545	    begin() const
  3546	    { return __bits.to_ullong(); }
  3547	
  3548	    iterator
  3549	    end() const
  3550	    { return 0; }
  3551	  };
  3552	
  3553	template <typename _Tp, typename _Ap>
  3554	  where_range<simd_size_v<_Tp, _Ap>>
  3555	  where(const simd_mask<_Tp, _Ap>& __k)
  3556	  { return __k.__to_bitset(); }
  3557	
  3558	} // namespace __proposed
  3559	
  3560	// }}}1
  3561	// reductions [simd.reductions] {{{1
  3562	template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
  3563	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
  3564	  reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
  3565	  { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
  3566	
  3567	template <typename _M, typename _V, typename _BinaryOperation = plus<>>
  3568	  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  3569	  reduce(const const_where_expression<_M, _V>& __x,
  3570		 typename _V::value_type __identity_element, _BinaryOperation __binary_op)
  3571	  {
  3572	    if (__builtin_expect(none_of(__get_mask(__x)), false))
  3573	      return __identity_element;
  3574	
  3575	    _V __tmp = __identity_element;
  3576	    _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
  3577					__data(__get_lvalue(__x)));
  3578	    return reduce(__tmp, __binary_op);
  3579	  }
  3580	
  3581	template <typename _M, typename _V>
  3582	  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  3583	  reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
  3584	  { return reduce(__x, 0, __binary_op); }
  3585	
  3586	template <typename _M, typename _V>
  3587	  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  3588	  reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
  3589	  { return reduce(__x, 1, __binary_op); }
  3590	
  3591	template <typename _M, typename _V>
  3592	  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  3593	  reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
  3594	  { return reduce(__x, ~typename _V::value_type(), __binary_op); }
  3595	
  3596	template <typename _M, typename _V>
  3597	  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  3598	  reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
  3599	  { return reduce(__x, 0, __binary_op); }
  3600	
  3601	template <typename _M, typename _V>
  3602	  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
  3603	  reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
  3604	  { return reduce(__x, 0, __binary_op); }
  3605	
  3606	template <typename _Tp, typename _Abi>
  3607	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
  3608	  hmin(const simd<_Tp, _Abi>& __v) noexcept
  3609	  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
  3610	
  3611	template <typename _Tp, typename _Abi>
  3612	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
  3613	  hmax(const simd<_Tp, _Abi>& __v) noexcept
  3614	  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
  3615	
  3616	template <typename _M, typename _V>
  3617	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  3618	  typename _V::value_type
  3619	  hmin(const const_where_expression<_M, _V>& __x) noexcept
  3620	  {
  3621	    using _Tp = typename _V::value_type;
  3622	    constexpr _Tp __id_elem =
  3623	#ifdef __FINITE_MATH_ONLY__
  3624	      __finite_max_v<_Tp>;
  3625	#else
  3626	      __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
  3627	#endif
  3628	    _V __tmp = __id_elem;
  3629	    _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
  3630					__data(__get_lvalue(__x)));
  3631	    return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
  3632	  }
  3633	
  3634	template <typename _M, typename _V>
  3635	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  3636	  typename _V::value_type
  3637	  hmax(const const_where_expression<_M, _V>& __x) noexcept
  3638	  {
  3639	    using _Tp = typename _V::value_type;
  3640	    constexpr _Tp __id_elem =
  3641	#ifdef __FINITE_MATH_ONLY__
  3642	      __finite_min_v<_Tp>;
  3643	#else
  3644	      [] {
  3645		if constexpr (__value_exists_v<__infinity, _Tp>)
  3646		  return -__infinity_v<_Tp>;
  3647		else
  3648		  return __finite_min_v<_Tp>;
  3649	      }();
  3650	#endif
  3651	    _V __tmp = __id_elem;
  3652	    _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
  3653					__data(__get_lvalue(__x)));
  3654	    return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
  3655	  }
  3656	
  3657	// }}}1
  3658	// algorithms [simd.alg] {{{
  3659	template <typename _Tp, typename _Ap>
  3660	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  3661	  min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  3662	  { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
  3663	
  3664	template <typename _Tp, typename _Ap>
  3665	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  3666	  max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  3667	  { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
  3668	
  3669	template <typename _Tp, typename _Ap>
  3670	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  3671	  pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
  3672	  minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  3673	  {
  3674	    const auto pair_of_members
  3675	      = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
  3676	    return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
  3677		    simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
  3678	  }
  3679	
  3680	template <typename _Tp, typename _Ap>
  3681	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  3682	  clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
  3683	  {
  3684	    using _Impl = typename _Ap::_SimdImpl;
  3685	    return {__private_init,
  3686		    _Impl::_S_min(__data(__hi),
  3687				  _Impl::_S_max(__data(__lo), __data(__v)))};
  3688	  }
  3689	
  3690	// }}}
  3691	
  3692	template <size_t... _Sizes, typename _Tp, typename _Ap,
  3693		  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
  3694	  inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
  3695	  split(const simd<_Tp, _Ap>&);
  3696	
  3697	// __extract_part {{{
  3698	template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
  3699	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
  3700	  _SimdWrapper<_Tp, _Np / _Total * _Combine>
  3701	  __extract_part(const _SimdWrapper<_Tp, _Np> __x);
  3702	
  3703	template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
  3704	  _GLIBCXX_SIMD_INTRINSIC constexpr auto
  3705	  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
  3706	
  3707	// }}}
  3708	// _SizeList {{{
  3709	template <size_t _V0, size_t... _Values>
  3710	  struct _SizeList
  3711	  {
  3712	    template <size_t _I>
  3713	      static constexpr size_t
  3714	      _S_at(_SizeConstant<_I> = {})
  3715	      {
  3716		if constexpr (_I == 0)
  3717		  return _V0;
  3718		else
  3719		  return _SizeList<_Values...>::template _S_at<_I - 1>();
  3720	      }
  3721	
  3722	    template <size_t _I>
  3723	      static constexpr auto
  3724	      _S_before(_SizeConstant<_I> = {})
  3725	      {
  3726		if constexpr (_I == 0)
  3727		  return _SizeConstant<0>();
  3728		else
  3729		  return _SizeConstant<
  3730		    _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
  3731	      }
  3732	
  3733	    template <size_t _Np>
  3734	      static constexpr auto
  3735	      _S_pop_front(_SizeConstant<_Np> = {})
  3736	      {
  3737		if constexpr (_Np == 0)
  3738		  return _SizeList();
  3739		else
  3740		  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
  3741	      }
  3742	  };
  3743	
  3744	// }}}
  3745	// __extract_center {{{
  3746	template <typename _Tp, size_t _Np>
  3747	  _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
  3748	  __extract_center(_SimdWrapper<_Tp, _Np> __x)
  3749	  {
  3750	    static_assert(_Np >= 4);
  3751	    static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
  3752	#if _GLIBCXX_SIMD_X86INTRIN    // {{{
  3753	    if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
  3754	      {
  3755		const auto __intrin = __to_intrin(__x);
  3756		if constexpr (is_integral_v<_Tp>)
  3757		  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
  3758		    _mm512_shuffle_i32x4(__intrin, __intrin,
  3759					 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
  3760		else if constexpr (sizeof(_Tp) == 4)
  3761		  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
  3762		    _mm512_shuffle_f32x4(__intrin, __intrin,
  3763					 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
  3764		else if constexpr (sizeof(_Tp) == 8)
  3765		  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
  3766		    _mm512_shuffle_f64x2(__intrin, __intrin,
  3767					 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
  3768		else
  3769		  __assert_unreachable<_Tp>();
  3770	      }
  3771	    else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
  3772	      return __vector_bitcast<_Tp>(
  3773		_mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
  3774			       __hi128(__vector_bitcast<double>(__x)), 1));
  3775	    else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
  3776	      return __vector_bitcast<_Tp>(
  3777		_mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
  3778				__lo128(__vector_bitcast<_LLong>(__x)),
  3779				sizeof(_Tp) * _Np / 4));
  3780	    else
  3781	#endif // _GLIBCXX_SIMD_X86INTRIN }}}
  3782	      {
  3783		__vector_type_t<_Tp, _Np / 2> __r;
  3784		__builtin_memcpy(&__r,
  3785				 reinterpret_cast<const char*>(&__x)
  3786				   + sizeof(_Tp) * _Np / 4,
  3787				 sizeof(_Tp) * _Np / 2);
  3788		return __r;
  3789	      }
  3790	  }
  3791	
  3792	template <typename _Tp, typename _A0, typename... _As>
  3793	  _GLIBCXX_SIMD_INTRINSIC
  3794	  _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
  3795	  __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
  3796	  {
  3797	    if constexpr (sizeof...(_As) == 0)
  3798	      return __extract_center(__x.first);
  3799	    else
  3800	      return __extract_part<1, 4, 2>(__x);
  3801	  }
  3802	
  3803	// }}}
  3804	// __split_wrapper {{{
  3805	template <size_t... _Sizes, typename _Tp, typename... _As>
  3806	  auto
  3807	  __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
  3808	  {
  3809	    return split<_Sizes...>(
  3810	      fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
  3811								       __x));
  3812	  }
  3813	
  3814	// }}}
  3815	
  3816	// split<simd>(simd) {{{
  3817	template <typename _V, typename _Ap,
  3818		  size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
  3819	  enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
  3820			&& is_simd_v<_V>, array<_V, _Parts>>
  3821	  split(const simd<typename _V::value_type, _Ap>& __x)
  3822	  {
  3823	    using _Tp = typename _V::value_type;
  3824	    if constexpr (_Parts == 1)
  3825	      {
  3826		return {simd_cast<_V>(__x)};
  3827	      }
  3828	    else if (__x._M_is_constprop())
  3829	      {
  3830		return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
  3831			 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  3832			   return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
  3833				     { return __x[__i * _V::size() + __j]; });
  3834			 });
  3835	      }
  3836	    else if constexpr (
  3837	      __is_fixed_size_abi_v<_Ap>
  3838	      && (is_same_v<typename _V::abi_type, simd_abi::scalar>
  3839		|| (__is_fixed_size_abi_v<typename _V::abi_type>
  3840		  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
  3841		  )))
  3842	      {
  3843		// fixed_size -> fixed_size (w/o padding) or scalar
  3844	#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
  3845	      const __may_alias<_Tp>* const __element_ptr
  3846		= reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
  3847	      return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
  3848		       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
  3849		       { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
  3850	#else
  3851	      const auto& __xx = __data(__x);
  3852	      return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
  3853		       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  3854			 [[maybe_unused]] constexpr size_t __offset
  3855			   = decltype(__i)::value * _V::size();
  3856			 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  3857				  constexpr _SizeConstant<__j + __offset> __k;
  3858				  return __xx[__k];
  3859				});
  3860		       });
  3861	#endif
  3862	    }
  3863	  else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
  3864	    {
  3865	      // normally memcpy should work here as well
  3866	      return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
  3867		       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
  3868	    }
  3869	  else
  3870	    {
  3871	      return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
  3872		       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  3873			 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
  3874			   return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  3875				    return __x[__i * _V::size() + __j];
  3876				  });
  3877			 else
  3878			   return _V(__private_init,
  3879				     __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
  3880		       });
  3881	    }
  3882	  }
  3883	
  3884	// }}}
  3885	// split<simd_mask>(simd_mask) {{{
  3886	template <typename _V, typename _Ap,
  3887		  size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
  3888	  enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
  3889	    _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
  3890	  split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
  3891	  {
  3892	    if constexpr (is_same_v<_Ap, typename _V::abi_type>)
  3893	      return {__x};
  3894	    else if constexpr (_Parts == 1)
  3895	      return {__proposed::static_simd_cast<_V>(__x)};
  3896	    else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
  3897			       && __is_avx_abi<_Ap>())
  3898	      return {_V(__private_init, __lo128(__data(__x))),
  3899		      _V(__private_init, __hi128(__data(__x)))};
  3900	    else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
  3901	      {
  3902		const bitset __bits = __x.__to_bitset();
  3903		return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
  3904			 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  3905			   constexpr size_t __offset = __i * _V::size();
  3906			   return _V(__bitset_init, (__bits >> __offset).to_ullong());
  3907			 });
  3908	      }
  3909	    else
  3910	      {
  3911		return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
  3912			 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  3913			   constexpr size_t __offset = __i * _V::size();
  3914			   return _V(__private_init,
  3915				     [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  3916				       return __x[__j + __offset];
  3917				     });
  3918			 });
  3919	      }
  3920	  }
  3921	
  3922	// }}}
  3923	// split<_Sizes...>(simd) {{{
  3924	template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
  3925	  _GLIBCXX_SIMD_ALWAYS_INLINE
  3926	  tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
  3927	  split(const simd<_Tp, _Ap>& __x)
  3928	  {
  3929	    using _SL = _SizeList<_Sizes...>;
  3930	    using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
  3931	    constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
  3932	    constexpr size_t _N0 = _SL::template _S_at<0>();
  3933	    using _V = __deduced_simd<_Tp, _N0>;
  3934	
  3935	    if (__x._M_is_constprop())
  3936	      return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
  3937		       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  3938			 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
  3939			 constexpr size_t __offset = _SL::_S_before(__i);
  3940			 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  3941				  return __x[__offset + __j];
  3942				});
  3943		       });
  3944	    else if constexpr (_Np == _N0)
  3945	      {
  3946		static_assert(sizeof...(_Sizes) == 1);
  3947		return {simd_cast<_V>(__x)};
  3948	      }
  3949	    else if constexpr // split from fixed_size, such that __x::first.size == _N0
  3950	      (__is_fixed_size_abi_v<
  3951		 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
  3952	      {
  3953		static_assert(
  3954		  !__is_fixed_size_abi_v<typename _V::abi_type>,
  3955		  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
  3956		  "fixed_size_simd "
  3957		  "when deduced?");
  3958		// extract first and recurse (__split_wrapper is needed to deduce a new
  3959		// _Sizes pack)
  3960		return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
  3961				 __split_wrapper(_SL::template _S_pop_front<1>(),
  3962						 __data(__x).second));
  3963	      }
  3964	    else if constexpr ((!is_same_v<simd_abi::scalar,
  3965					   simd_abi::deduce_t<_Tp, _Sizes>> && ...)
  3966			       && (!__is_fixed_size_abi_v<
  3967				     simd_abi::deduce_t<_Tp, _Sizes>> && ...))
  3968	      {
  3969		if constexpr (((_Sizes * 2 == _Np) && ...))
  3970		  return {{__private_init, __extract_part<0, 2>(__data(__x))},
  3971			  {__private_init, __extract_part<1, 2>(__data(__x))}};
  3972		else if constexpr (is_same_v<_SizeList<_Sizes...>,
  3973					     _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
  3974		  return {{__private_init, __extract_part<0, 3>(__data(__x))},
  3975			  {__private_init, __extract_part<1, 3>(__data(__x))},
  3976			  {__private_init, __extract_part<2, 3>(__data(__x))}};
  3977		else if constexpr (is_same_v<_SizeList<_Sizes...>,
  3978					     _SizeList<2 * _Np / 3, _Np / 3>>)
  3979		  return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
  3980			  {__private_init, __extract_part<2, 3>(__data(__x))}};
  3981		else if constexpr (is_same_v<_SizeList<_Sizes...>,
  3982					     _SizeList<_Np / 3, 2 * _Np / 3>>)
  3983		  return {{__private_init, __extract_part<0, 3>(__data(__x))},
  3984			  {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
  3985		else if constexpr (is_same_v<_SizeList<_Sizes...>,
  3986					     _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
  3987		  return {{__private_init, __extract_part<0, 2>(__data(__x))},
  3988			  {__private_init, __extract_part<2, 4>(__data(__x))},
  3989			  {__private_init, __extract_part<3, 4>(__data(__x))}};
  3990		else if constexpr (is_same_v<_SizeList<_Sizes...>,
  3991					     _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
  3992		  return {{__private_init, __extract_part<0, 4>(__data(__x))},
  3993			  {__private_init, __extract_part<1, 4>(__data(__x))},
  3994			  {__private_init, __extract_part<1, 2>(__data(__x))}};
  3995		else if constexpr (is_same_v<_SizeList<_Sizes...>,
  3996					     _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
  3997		  return {{__private_init, __extract_part<0, 4>(__data(__x))},
  3998			  {__private_init, __extract_center(__data(__x))},
  3999			  {__private_init, __extract_part<3, 4>(__data(__x))}};
  4000		else if constexpr (((_Sizes * 4 == _Np) && ...))
  4001		  return {{__private_init, __extract_part<0, 4>(__data(__x))},
  4002			  {__private_init, __extract_part<1, 4>(__data(__x))},
  4003			  {__private_init, __extract_part<2, 4>(__data(__x))},
  4004			  {__private_init, __extract_part<3, 4>(__data(__x))}};
  4005		// else fall through
  4006	      }
  4007	#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
  4008	    const __may_alias<_Tp>* const __element_ptr
  4009	      = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
  4010	    return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
  4011		     [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  4012		       using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
  4013		       constexpr size_t __offset = _SL::_S_before(__i);
  4014		       constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
  4015		       constexpr size_t __a
  4016			 = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
  4017		       constexpr size_t __b = ((__a - 1) & __a) ^ __a;
  4018		       constexpr size_t __alignment = __b == 0 ? __a : __b;
  4019		       return _Vi(__element_ptr + __offset, overaligned<__alignment>);
  4020		     });
  4021	#else
  4022	    return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
  4023		     [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  4024		       using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
  4025		       const auto& __xx = __data(__x);
  4026		       using _Offset = decltype(_SL::_S_before(__i));
  4027		       return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  4028				constexpr _SizeConstant<_Offset::value + __j> __k;
  4029				return __xx[__k];
  4030			      });
  4031		     });
  4032	#endif
  4033	  }
  4034	
  4035	// }}}
  4036	
  4037	// __subscript_in_pack {{{
  4038	template <size_t _I, typename _Tp, typename _Ap, typename... _As>
  4039	  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
  4040	  __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
  4041	  {
  4042	    if constexpr (_I < simd_size_v<_Tp, _Ap>)
  4043	      return __x[_I];
  4044	    else
  4045	      return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
  4046	  }
  4047	
  4048	// }}}
  4049	// __store_pack_of_simd {{{
  4050	template <typename _Tp, typename _A0, typename... _As>
  4051	  _GLIBCXX_SIMD_INTRINSIC void
  4052	  __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
  4053	  {
  4054	    constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
  4055	    __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
  4056	    if constexpr (sizeof...(__xs) > 0)
  4057	      __store_pack_of_simd(__mem + __n_bytes, __xs...);
  4058	  }
  4059	
  4060	// }}}
  4061	// concat(simd...) {{{
  4062	template <typename _Tp, typename... _As>
  4063	  inline _GLIBCXX_SIMD_CONSTEXPR
  4064	  simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
  4065	  concat(const simd<_Tp, _As>&... __xs)
  4066	  {
  4067	    using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
  4068	    if constexpr (sizeof...(__xs) == 1)
  4069	      return simd_cast<_Rp>(__xs...);
  4070	    else if ((... && __xs._M_is_constprop()))
  4071	      return simd<_Tp,
  4072			  simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>(
  4073		       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
  4074		       { return __subscript_in_pack<__i>(__xs...); });
  4075	    else
  4076	      {
  4077		_Rp __r{};
  4078		__store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
  4079		return __r;
  4080	      }
  4081	  }
  4082	
  4083	// }}}
  4084	// concat(array<simd>) {{{
  4085	template <typename _Tp, typename _Abi, size_t _Np>
  4086	  _GLIBCXX_SIMD_ALWAYS_INLINE
  4087	  _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
  4088	  concat(const array<simd<_Tp, _Abi>, _Np>& __x)
  4089	  {
  4090	    return __call_with_subscripts<_Np>(
  4091		     __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  4092		       return concat(__xs...);
  4093		     });
  4094	  }
  4095	
  4096	// }}}
  4097	
  4098	/// @cond undocumented
  4099	// _SmartReference {{{
  4100	template <typename _Up, typename _Accessor = _Up,
  4101		  typename _ValueType = typename _Up::value_type>
  4102	  class _SmartReference
  4103	  {
  4104	    friend _Accessor;
  4105	    int _M_index;
  4106	    _Up& _M_obj;
  4107	
  4108	    _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
  4109	    _M_read() const noexcept
  4110	    {
  4111	      if constexpr (is_arithmetic_v<_Up>)
  4112		return _M_obj;
  4113	      else
  4114		return _M_obj[_M_index];
  4115	    }
  4116	
  4117	    template <typename _Tp>
  4118	      _GLIBCXX_SIMD_INTRINSIC constexpr void
  4119	      _M_write(_Tp&& __x) const
  4120	      { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
  4121	
  4122	  public:
  4123	    _GLIBCXX_SIMD_INTRINSIC constexpr
  4124	    _SmartReference(_Up& __o, int __i) noexcept
  4125	    : _M_index(__i), _M_obj(__o) {}
  4126	
  4127	    using value_type = _ValueType;
  4128	
  4129	    _GLIBCXX_SIMD_INTRINSIC
  4130	    _SmartReference(const _SmartReference&) = delete;
  4131	
  4132	    _GLIBCXX_SIMD_INTRINSIC constexpr
  4133	    operator value_type() const noexcept
  4134	    { return _M_read(); }
  4135	
  4136	    template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
  4137	      _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
  4138	      operator=(_Tp&& __x) &&
  4139	      {
  4140		_M_write(static_cast<_Tp&&>(__x));
  4141		return {_M_obj, _M_index};
  4142	      }
  4143	
  4144	#define _GLIBCXX_SIMD_OP_(__op)                                                   \
  4145	    template <typename _Tp,                                                       \
  4146		      typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
  4147		      typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>,       \
  4148		      typename = _ValuePreservingOrInt<_TT, value_type>>                  \
  4149	      _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference                           \
  4150	      operator __op##=(_Tp&& __x) &&                                              \
  4151	      {                                                                           \
  4152		const value_type& __lhs = _M_read();                                      \
  4153		_M_write(__lhs __op __x);                                                 \
  4154		return {_M_obj, _M_index};                                                \
  4155	      }
  4156	    _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
  4157	    _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
  4158	    _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
  4159	#undef _GLIBCXX_SIMD_OP_
  4160	
  4161	    template <typename _Tp = void,
  4162		      typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
  4163	      _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
  4164	      operator++() &&
  4165	      {
  4166		value_type __x = _M_read();
  4167		_M_write(++__x);
  4168		return {_M_obj, _M_index};
  4169	      }
  4170	
  4171	    template <typename _Tp = void,
  4172		      typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
  4173	      _GLIBCXX_SIMD_INTRINSIC constexpr value_type
  4174	      operator++(int) &&
  4175	      {
  4176		const value_type __r = _M_read();
  4177		value_type __x = __r;
  4178		_M_write(++__x);
  4179		return __r;
  4180	      }
  4181	
  4182	    template <typename _Tp = void,
  4183		      typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
  4184	      _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
  4185	      operator--() &&
  4186	      {
  4187		value_type __x = _M_read();
  4188		_M_write(--__x);
  4189		return {_M_obj, _M_index};
  4190	      }
  4191	
  4192	    template <typename _Tp = void,
  4193		      typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
  4194	      _GLIBCXX_SIMD_INTRINSIC constexpr value_type
  4195	      operator--(int) &&
  4196	      {
  4197		const value_type __r = _M_read();
  4198		value_type __x = __r;
  4199		_M_write(--__x);
  4200		return __r;
  4201	      }
  4202	
  4203	    _GLIBCXX_SIMD_INTRINSIC friend void
  4204	    swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
  4205	      conjunction<
  4206		is_nothrow_constructible<value_type, _SmartReference&&>,
  4207		is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
  4208	    {
  4209	      value_type __tmp = static_cast<_SmartReference&&>(__a);
  4210	      static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
  4211	      static_cast<_SmartReference&&>(__b) = std::move(__tmp);
  4212	    }
  4213	
  4214	    _GLIBCXX_SIMD_INTRINSIC friend void
  4215	    swap(value_type& __a, _SmartReference&& __b) noexcept(
  4216	      conjunction<
  4217		is_nothrow_constructible<value_type, value_type&&>,
  4218		is_nothrow_assignable<value_type&, value_type&&>,
  4219		is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
  4220	    {
  4221	      value_type __tmp(std::move(__a));
  4222	      __a = static_cast<value_type>(__b);
  4223	      static_cast<_SmartReference&&>(__b) = std::move(__tmp);
  4224	    }
  4225	
  4226	    _GLIBCXX_SIMD_INTRINSIC friend void
  4227	    swap(_SmartReference&& __a, value_type& __b) noexcept(
  4228	      conjunction<
  4229		is_nothrow_constructible<value_type, _SmartReference&&>,
  4230		is_nothrow_assignable<value_type&, value_type&&>,
  4231		is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
  4232	    {
  4233	      value_type __tmp(__a);
  4234	      static_cast<_SmartReference&&>(__a) = std::move(__b);
  4235	      __b = std::move(__tmp);
  4236	    }
  4237	  };
  4238	
  4239	// }}}
  4240	// __scalar_abi_wrapper {{{
  4241	template <int _Bytes>
  4242	  struct __scalar_abi_wrapper
  4243	  {
  4244	    template <typename _Tp> static constexpr size_t _S_full_size = 1;
  4245	    template <typename _Tp> static constexpr size_t _S_size = 1;
  4246	    template <typename _Tp> static constexpr size_t _S_is_partial = false;
  4247	
  4248	    template <typename _Tp, typename _Abi = simd_abi::scalar>
  4249	      static constexpr bool _S_is_valid_v
  4250		= _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
  4251	  };
  4252	
  4253	// }}}
  4254	// __decay_abi metafunction {{{
  4255	template <typename _Tp>
  4256	  struct __decay_abi { using type = _Tp; };
  4257	
  4258	template <int _Bytes>
  4259	  struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
  4260	  { using type = simd_abi::scalar; };
  4261	
  4262	// }}}
  4263	// __find_next_valid_abi metafunction {{{1
  4264	// Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
  4265	// true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
  4266	// recursion at 2 elements in the resulting ABI tag. In this case
  4267	// type::_S_is_valid_v<_Tp> may be false.
  4268	template <template <int> class _Abi, int _Bytes, typename _Tp>
  4269	  struct __find_next_valid_abi
  4270	  {
  4271	    static constexpr auto
  4272	    _S_choose()
  4273	    {
  4274	      constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
  4275	      using _NextAbi = _Abi<_NextBytes>;
  4276	      if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
  4277		return _Abi<_Bytes>();
  4278	      else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
  4279				 && _NextAbi::template _S_is_valid_v<_Tp>)
  4280		return _NextAbi();
  4281	      else
  4282		return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
  4283	    }
  4284	
  4285	    using type = decltype(_S_choose());
  4286	  };
  4287	
  4288	template <int _Bytes, typename _Tp>
  4289	  struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
  4290	  { using type = simd_abi::scalar; };
  4291	
  4292	// _AbiList {{{1
  4293	template <template <int> class...>
  4294	  struct _AbiList
  4295	  {
  4296	    template <typename, int> static constexpr bool _S_has_valid_abi = false;
  4297	    template <typename, int> using _FirstValidAbi = void;
  4298	    template <typename, int> using _BestAbi = void;
  4299	  };
  4300	
  4301	template <template <int> class _A0, template <int> class... _Rest>
  4302	  struct _AbiList<_A0, _Rest...>
  4303	  {
  4304	    template <typename _Tp, int _Np>
  4305	      static constexpr bool _S_has_valid_abi
  4306		= _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
  4307		    _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
  4308	
  4309	    template <typename _Tp, int _Np>
  4310	      using _FirstValidAbi = conditional_t<
  4311		_A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
  4312		typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
  4313		typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
  4314	
  4315	    template <typename _Tp, int _Np>
  4316	      static constexpr auto
  4317	      _S_determine_best_abi()
  4318	      {
  4319		static_assert(_Np >= 1);
  4320		constexpr int _Bytes = sizeof(_Tp) * _Np;
  4321		if constexpr (_Np == 1)
  4322		  return __make_dependent_t<_Tp, simd_abi::scalar>{};
  4323		else
  4324		  {
  4325		    constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
  4326		    // _A0<_Bytes> is good if:
  4327		    // 1. The ABI tag is valid for _Tp
  4328		    // 2. The storage overhead is no more than padding to fill the next
  4329		    //    power-of-2 number of bytes
  4330		    if constexpr (_A0<_Bytes>::template _S_is_valid_v<
  4331				    _Tp> && __fullsize / 2 < _Np)
  4332		      return typename __decay_abi<_A0<_Bytes>>::type{};
  4333		    else
  4334		      {
  4335			using _Bp =
  4336			  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
  4337			if constexpr (_Bp::template _S_is_valid_v<
  4338					_Tp> && _Bp::template _S_size<_Tp> <= _Np)
  4339			  return _Bp{};
  4340			else
  4341			  return
  4342			    typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
  4343		      }
  4344		  }
  4345	      }
  4346	
  4347	    template <typename _Tp, int _Np>
  4348	      using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
  4349	  };
  4350	
  4351	// }}}1
  4352	
  4353	// the following lists all native ABIs, which makes them accessible to
  4354	// simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
  4355	// matters: Whatever comes first has higher priority.
  4356	using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
  4357					__scalar_abi_wrapper>;
  4358	
  4359	// valid _SimdTraits specialization {{{1
  4360	template <typename _Tp, typename _Abi>
  4361	  struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
  4362	  : _Abi::template __traits<_Tp> {};
  4363	
  4364	// __deduce_impl specializations {{{1
  4365	// try all native ABIs (including scalar) first
  4366	template <typename _Tp, size_t _Np>
  4367	  struct __deduce_impl<
  4368	    _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
  4369	  { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
  4370	
  4371	// fall back to fixed_size only if scalar and native ABIs don't match
  4372	template <typename _Tp, size_t _Np, typename = void>
  4373	  struct __deduce_fixed_size_fallback {};
  4374	
  4375	template <typename _Tp, size_t _Np>
  4376	  struct __deduce_fixed_size_fallback<_Tp, _Np,
  4377	    enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
  4378	  { using type = simd_abi::fixed_size<_Np>; };
  4379	
  4380	template <typename _Tp, size_t _Np, typename>
  4381	  struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
  4382	
  4383	//}}}1
  4384	/// @endcond
  4385	
  4386	// simd_mask {{{
  4387	template <typename _Tp, typename _Abi>
  4388	  class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
  4389	  {
  4390	    // types, tags, and friends {{{
  4391	    using _Traits = _SimdTraits<_Tp, _Abi>;
  4392	    using _MemberType = typename _Traits::_MaskMember;
  4393	
  4394	    // We map all masks with equal element sizeof to a single integer type, the
  4395	    // one given by __int_for_sizeof_t<_Tp>. This is the approach
  4396	    // [[gnu::vector_size(N)]] types take as well and it reduces the number of
  4397	    // template specializations in the implementation classes.
  4398	    using _Ip = __int_for_sizeof_t<_Tp>;
  4399	    static constexpr _Ip* _S_type_tag = nullptr;
  4400	
  4401	    friend typename _Traits::_MaskBase;
  4402	    friend class simd<_Tp, _Abi>;       // to construct masks on return
  4403	    friend typename _Traits::_SimdImpl; // to construct masks on return and
  4404						// inspect data on masked operations
  4405	  public:
  4406	    using _Impl = typename _Traits::_MaskImpl;
  4407	    friend _Impl;
  4408	
  4409	    // }}}
  4410	    // member types {{{
  4411	    using value_type = bool;
  4412	    using reference = _SmartReference<_MemberType, _Impl, value_type>;
  4413	    using simd_type = simd<_Tp, _Abi>;
  4414	    using abi_type = _Abi;
  4415	
  4416	    // }}}
  4417	    static constexpr size_t size() // {{{
  4418	    { return __size_or_zero_v<_Tp, _Abi>; }
  4419	
  4420	    // }}}
  4421	    // constructors & assignment {{{
  4422	    simd_mask() = default;
  4423	    simd_mask(const simd_mask&) = default;
  4424	    simd_mask(simd_mask&&) = default;
  4425	    simd_mask& operator=(const simd_mask&) = default;
  4426	    simd_mask& operator=(simd_mask&&) = default;
  4427	
  4428	    // }}}
  4429	    // access to internal representation (optional feature) {{{
  4430	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
  4431	    simd_mask(typename _Traits::_MaskCastType __init)
  4432	    : _M_data{__init} {}
  4433	    // conversions to internal type is done in _MaskBase
  4434	
  4435	    // }}}
  4436	    // bitset interface (extension to be proposed) {{{
  4437	    // TS_FEEDBACK:
  4438	    // Conversion of simd_mask to and from bitset makes it much easier to
  4439	    // interface with other facilities. I suggest adding `static
  4440	    // simd_mask::from_bitset` and `simd_mask::to_bitset`.
  4441	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
  4442	    __from_bitset(bitset<size()> bs)
  4443	    { return {__bitset_init, bs}; }
  4444	
  4445	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
  4446	    __to_bitset() const
  4447	    { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
  4448	
  4449	    // }}}
  4450	    // explicit broadcast constructor {{{
  4451	    _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
  4452	    simd_mask(value_type __x)
  4453	    : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
  4454	
  4455	    // }}}
  4456	    // implicit type conversion constructor {{{
  4457	  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
  4458	    // proposed improvement
  4459	    template <typename _Up, typename _A2,
  4460		      typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
  4461	      _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
  4462		  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
  4463	      simd_mask(const simd_mask<_Up, _A2>& __x)
  4464	      : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
  4465	  #else
  4466	    // conforming to ISO/IEC 19570:2018
  4467	    template <typename _Up, typename = enable_if_t<conjunction<
  4468				      is_same<abi_type, simd_abi::fixed_size<size()>>,
  4469				      is_same<_Up, _Up>>::value>>
  4470	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
  4471	      simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
  4472	      : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
  4473	  #endif
  4474	
  4475	    // }}}
  4476	    // load constructor {{{
  4477	    template <typename _Flags>
  4478	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
  4479	      simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
  4480	      : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
  4481	
  4482	    template <typename _Flags>
  4483	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
  4484	      simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
  4485	      : _M_data{}
  4486	      {
  4487		_M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
  4488						_Flags::template _S_apply<simd_mask>(__mem));
  4489	      }
  4490	
  4491	    // }}}
  4492	    // loads [simd_mask.load] {{{
  4493	    template <typename _Flags>
  4494	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
  4495	      copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
  4496	      { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
  4497	
  4498	    // }}}
  4499	    // stores [simd_mask.store] {{{
  4500	    template <typename _Flags>
  4501	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
  4502	      copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
  4503	      { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
  4504	
  4505	    // }}}
  4506	    // scalar access {{{
  4507	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
  4508	    operator[](size_t __i)
  4509	    {
  4510	      if (__i >= size())
  4511		__invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
  4512	      return {_M_data, int(__i)};
  4513	    }
  4514	
  4515	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
  4516	    operator[](size_t __i) const
  4517	    {
  4518	      if (__i >= size())
  4519		__invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
  4520	      if constexpr (__is_scalar_abi<_Abi>())
  4521		return _M_data;
  4522	      else
  4523		return static_cast<bool>(_M_data[__i]);
  4524	    }
  4525	
  4526	    // }}}
  4527	    // negation {{{
  4528	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
  4529	    operator!() const
  4530	    { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
  4531	
  4532	    // }}}
  4533	    // simd_mask binary operators [simd_mask.binary] {{{
  4534	  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
  4535	    // simd_mask<int> && simd_mask<uint> needs disambiguation
  4536	    template <typename _Up, typename _A2,
  4537		      typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
  4538	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4539	      operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
  4540	      {
  4541		return {__private_init,
  4542			_Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
  4543	      }
  4544	
  4545	    template <typename _Up, typename _A2,
  4546		      typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
  4547	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4548	      operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
  4549	      {
  4550		return {__private_init,
  4551			_Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
  4552	      }
  4553	  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
  4554	
  4555	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4556	    operator&&(const simd_mask& __x, const simd_mask& __y)
  4557	    { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
  4558	
  4559	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4560	    operator||(const simd_mask& __x, const simd_mask& __y)
  4561	    { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
  4562	
  4563	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4564	    operator&(const simd_mask& __x, const simd_mask& __y)
  4565	    { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
  4566	
  4567	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4568	    operator|(const simd_mask& __x, const simd_mask& __y)
  4569	    { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
  4570	
  4571	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4572	    operator^(const simd_mask& __x, const simd_mask& __y)
  4573	    { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
  4574	
  4575	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
  4576	    operator&=(simd_mask& __x, const simd_mask& __y)
  4577	    {
  4578	      __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
  4579	      return __x;
  4580	    }
  4581	
  4582	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
  4583	    operator|=(simd_mask& __x, const simd_mask& __y)
  4584	    {
  4585	      __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
  4586	      return __x;
  4587	    }
  4588	
  4589	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
  4590	    operator^=(simd_mask& __x, const simd_mask& __y)
  4591	    {
  4592	      __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
  4593	      return __x;
  4594	    }
  4595	
  4596	    // }}}
  4597	    // simd_mask compares [simd_mask.comparison] {{{
  4598	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4599	    operator==(const simd_mask& __x, const simd_mask& __y)
  4600	    { return !operator!=(__x, __y); }
  4601	
  4602	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4603	    operator!=(const simd_mask& __x, const simd_mask& __y)
  4604	    { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
  4605	
  4606	    // }}}
  4607	    // private_init ctor {{{
  4608	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  4609	    simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
  4610	    : _M_data(__init) {}
  4611	
  4612	    // }}}
  4613	    // private_init generator ctor {{{
  4614	    template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
  4615	      _GLIBCXX_SIMD_INTRINSIC constexpr
  4616	      simd_mask(_PrivateInit, _Fp&& __gen)
  4617	      : _M_data()
  4618	      {
  4619		__execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  4620		  _Impl::_S_set(_M_data, __i, __gen(__i));
  4621		});
  4622	      }
  4623	
  4624	    // }}}
  4625	    // bitset_init ctor {{{
  4626	    _GLIBCXX_SIMD_INTRINSIC constexpr
  4627	    simd_mask(_BitsetInit, bitset<size()> __init)
  4628	    : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
  4629	    {}
  4630	
  4631	    // }}}
  4632	    // __cvt {{{
  4633	    // TS_FEEDBACK:
  4634	    // The conversion operator this implements should be a ctor on simd_mask.
  4635	    // Once you call .__cvt() on a simd_mask it converts conveniently.
  4636	    // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
  4637	    struct _CvtProxy
  4638	    {
  4639	      template <typename _Up, typename _A2,
  4640			typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
  4641		operator simd_mask<_Up, _A2>() &&
  4642		{
  4643		  using namespace std::experimental::__proposed;
  4644		  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
  4645		}
  4646	
  4647	      const simd_mask<_Tp, _Abi>& _M_data;
  4648	    };
  4649	
  4650	    _GLIBCXX_SIMD_INTRINSIC _CvtProxy
  4651	    __cvt() const
  4652	    { return {*this}; }
  4653	
  4654	    // }}}
  4655	    // operator?: overloads (suggested extension) {{{
  4656	  #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
  4657	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4658	    operator?:(const simd_mask& __k, const simd_mask& __where_true,
  4659		       const simd_mask& __where_false)
  4660	    {
  4661	      auto __ret = __where_false;
  4662	      _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
  4663	      return __ret;
  4664	    }
  4665	
  4666	    template <typename _U1, typename _U2,
  4667		      typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
  4668		      typename = enable_if_t<conjunction_v<
  4669			is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
  4670			is_convertible<simd_mask, typename _Rp::mask_type>>>>
  4671	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
  4672	      operator?:(const simd_mask& __k, const _U1& __where_true,
  4673			 const _U2& __where_false)
  4674	      {
  4675		_Rp __ret = __where_false;
  4676		_Rp::_Impl::_S_masked_assign(
  4677		  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
  4678		  __data(static_cast<_Rp>(__where_true)));
  4679		return __ret;
  4680	      }
  4681	
  4682	  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
  4683	    template <typename _Kp, typename _Ak, typename _Up, typename _Au,
  4684		      typename = enable_if_t<
  4685			conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
  4686				      is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
  4687	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
  4688	      operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
  4689			 const simd_mask<_Up, _Au>& __where_false)
  4690	      {
  4691		simd_mask __ret = __where_false;
  4692		_Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
  4693					__where_true._M_data);
  4694		return __ret;
  4695	      }
  4696	  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
  4697	  #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
  4698	
  4699	    // }}}
  4700	    // _M_is_constprop {{{
  4701	    _GLIBCXX_SIMD_INTRINSIC constexpr bool
  4702	    _M_is_constprop() const
  4703	    {
  4704	      if constexpr (__is_scalar_abi<_Abi>())
  4705		return __builtin_constant_p(_M_data);
  4706	      else
  4707		return _M_data._M_is_constprop();
  4708	    }
  4709	
  4710	    // }}}
  4711	
  4712	  private:
  4713	    friend const auto& __data<_Tp, abi_type>(const simd_mask&);
  4714	    friend auto& __data<_Tp, abi_type>(simd_mask&);
  4715	    alignas(_Traits::_S_mask_align) _MemberType _M_data;
  4716	  };
  4717	
  4718	// }}}
  4719	
  4720	/// @cond undocumented
  4721	// __data(simd_mask) {{{
  4722	template <typename _Tp, typename _Ap>
  4723	  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
  4724	  __data(const simd_mask<_Tp, _Ap>& __x)
  4725	  { return __x._M_data; }
  4726	
  4727	template <typename _Tp, typename _Ap>
  4728	  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
  4729	  __data(simd_mask<_Tp, _Ap>& __x)
  4730	  { return __x._M_data; }
  4731	
  4732	// }}}
  4733	/// @endcond
  4734	
  4735	// simd_mask reductions [simd_mask.reductions] {{{
  4736	template <typename _Tp, typename _Abi>
  4737	  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  4738	  all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
  4739	  {
  4740	    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
  4741	      {
  4742		for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
  4743		  if (!__k[__i])
  4744		    return false;
  4745		return true;
  4746	      }
  4747	    else
  4748	      return _Abi::_MaskImpl::_S_all_of(__k);
  4749	  }
  4750	
  4751	template <typename _Tp, typename _Abi>
  4752	  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  4753	  any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
  4754	  {
  4755	    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
  4756	      {
  4757		for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
  4758		  if (__k[__i])
  4759		    return true;
  4760		return false;
  4761	      }
  4762	    else
  4763	      return _Abi::_MaskImpl::_S_any_of(__k);
  4764	  }
  4765	
  4766	template <typename _Tp, typename _Abi>
  4767	  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  4768	  none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
  4769	  {
  4770	    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
  4771	      {
  4772		for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
  4773		  if (__k[__i])
  4774		    return false;
  4775		return true;
  4776	      }
  4777	    else
  4778	      return _Abi::_MaskImpl::_S_none_of(__k);
  4779	  }
  4780	
  4781	template <typename _Tp, typename _Abi>
  4782	  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  4783	  some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
  4784	  {
  4785	    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
  4786	      {
  4787		for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
  4788		  if (__k[__i] != __k[__i - 1])
  4789		    return true;
  4790		return false;
  4791	      }
  4792	    else
  4793	      return _Abi::_MaskImpl::_S_some_of(__k);
  4794	  }
  4795	
  4796	template <typename _Tp, typename _Abi>
  4797	  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
  4798	  popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
  4799	  {
  4800	    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
  4801	      {
  4802		const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
  4803				  __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  4804				    return ((__elements != 0) + ...);
  4805				  });
  4806		if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
  4807		  return __r;
  4808	      }
  4809	    return _Abi::_MaskImpl::_S_popcount(__k);
  4810	  }
  4811	
  4812	template <typename _Tp, typename _Abi>
  4813	  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
  4814	  find_first_set(const simd_mask<_Tp, _Abi>& __k)
  4815	  {
  4816	    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
  4817	      {
  4818		constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
  4819		const size_t _Idx = __call_with_n_evaluations<_Np>(
  4820				      [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  4821					return std::min({__indexes...});
  4822				      }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  4823					return __k[__i] ? +__i : _Np;
  4824				      });
  4825		if (_Idx >= _Np)
  4826		  __invoke_ub("find_first_set(empty mask) is UB");
  4827		if (__builtin_constant_p(_Idx))
  4828		  return _Idx;
  4829	      }
  4830	    return _Abi::_MaskImpl::_S_find_first_set(__k);
  4831	  }
  4832	
  4833	template <typename _Tp, typename _Abi>
  4834	  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
  4835	  find_last_set(const simd_mask<_Tp, _Abi>& __k)
  4836	  {
  4837	    if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
  4838	      {
  4839		constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
  4840		const int _Idx = __call_with_n_evaluations<_Np>(
  4841				   [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  4842				     return std::max({__indexes...});
  4843				   }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  4844				     return __k[__i] ? int(__i) : -1;
  4845				   });
  4846		if (_Idx < 0)
  4847		  __invoke_ub("find_first_set(empty mask) is UB");
  4848		if (__builtin_constant_p(_Idx))
  4849		  return _Idx;
  4850	      }
  4851	    return _Abi::_MaskImpl::_S_find_last_set(__k);
  4852	  }
  4853	
  4854	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  4855	all_of(_ExactBool __x) noexcept
  4856	{ return __x; }
  4857	
  4858	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  4859	any_of(_ExactBool __x) noexcept
  4860	{ return __x; }
  4861	
  4862	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  4863	none_of(_ExactBool __x) noexcept
  4864	{ return !__x; }
  4865	
  4866	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
  4867	some_of(_ExactBool) noexcept
  4868	{ return false; }
  4869	
  4870	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
  4871	popcount(_ExactBool __x) noexcept
  4872	{ return __x; }
  4873	
  4874	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
  4875	find_first_set(_ExactBool)
  4876	{ return 0; }
  4877	
  4878	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
  4879	find_last_set(_ExactBool)
  4880	{ return 0; }
  4881	
  4882	// }}}
  4883	
  4884	/// @cond undocumented
  4885	// _SimdIntOperators{{{1
  4886	template <typename _V, typename _Impl, bool>
  4887	  class _SimdIntOperators {};
  4888	
  4889	template <typename _V, typename _Impl>
  4890	  class _SimdIntOperators<_V, _Impl, true>
  4891	  {
  4892	    _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
  4893	    __derived() const
  4894	    { return *static_cast<const _V*>(this); }
  4895	
  4896	    template <typename _Tp>
  4897	      _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
  4898	      _S_make_derived(_Tp&& __d)
  4899	      { return {__private_init, static_cast<_Tp&&>(__d)}; }
  4900	
  4901	  public:
  4902	    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator%=(_V& __lhs, const _V& __x)
  4903	    { return __lhs = __lhs % __x; }
  4904	
  4905	    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator&=(_V& __lhs, const _V& __x)
  4906	    { return __lhs = __lhs & __x; }
  4907	
  4908	    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator|=(_V& __lhs, const _V& __x)
  4909	    { return __lhs = __lhs | __x; }
  4910	
  4911	    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator^=(_V& __lhs, const _V& __x)
  4912	    { return __lhs = __lhs ^ __x; }
  4913	
  4914	    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, const _V& __x)
  4915	    { return __lhs = __lhs << __x; }
  4916	
  4917	    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, const _V& __x)
  4918	    { return __lhs = __lhs >> __x; }
  4919	
  4920	    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, int __x)
  4921	    { return __lhs = __lhs << __x; }
  4922	
  4923	    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, int __x)
  4924	    { return __lhs = __lhs >> __x; }
  4925	
  4926	    _GLIBCXX_SIMD_CONSTEXPR friend _V operator%(const _V& __x, const _V& __y)
  4927	    {
  4928	      return _SimdIntOperators::_S_make_derived(
  4929		_Impl::_S_modulus(__data(__x), __data(__y)));
  4930	    }
  4931	
  4932	    _GLIBCXX_SIMD_CONSTEXPR friend _V operator&(const _V& __x, const _V& __y)
  4933	    {
  4934	      return _SimdIntOperators::_S_make_derived(
  4935		_Impl::_S_bit_and(__data(__x), __data(__y)));
  4936	    }
  4937	
  4938	    _GLIBCXX_SIMD_CONSTEXPR friend _V operator|(const _V& __x, const _V& __y)
  4939	    {
  4940	      return _SimdIntOperators::_S_make_derived(
  4941		_Impl::_S_bit_or(__data(__x), __data(__y)));
  4942	    }
  4943	
  4944	    _GLIBCXX_SIMD_CONSTEXPR friend _V operator^(const _V& __x, const _V& __y)
  4945	    {
  4946	      return _SimdIntOperators::_S_make_derived(
  4947		_Impl::_S_bit_xor(__data(__x), __data(__y)));
  4948	    }
  4949	
  4950	    _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, const _V& __y)
  4951	    {
  4952	      return _SimdIntOperators::_S_make_derived(
  4953		_Impl::_S_bit_shift_left(__data(__x), __data(__y)));
  4954	    }
  4955	
  4956	    _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, const _V& __y)
  4957	    {
  4958	      return _SimdIntOperators::_S_make_derived(
  4959		_Impl::_S_bit_shift_right(__data(__x), __data(__y)));
  4960	    }
  4961	
  4962	    template <typename _VV = _V>
  4963	      _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, int __y)
  4964	      {
  4965		using _Tp = typename _VV::value_type;
  4966		if (__y < 0)
  4967		  __invoke_ub("The behavior is undefined if the right operand of a "
  4968			      "shift operation is negative. [expr.shift]\nA shift by "
  4969			      "%d was requested",
  4970			      __y);
  4971		if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
  4972		  __invoke_ub(
  4973		    "The behavior is undefined if the right operand of a "
  4974		    "shift operation is greater than or equal to the width of the "
  4975		    "promoted left operand. [expr.shift]\nA shift by %d was requested",
  4976		    __y);
  4977		return _SimdIntOperators::_S_make_derived(
  4978		  _Impl::_S_bit_shift_left(__data(__x), __y));
  4979	      }
  4980	
  4981	    template <typename _VV = _V>
  4982	      _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, int __y)
  4983	      {
  4984		using _Tp = typename _VV::value_type;
  4985		if (__y < 0)
  4986		  __invoke_ub(
  4987		    "The behavior is undefined if the right operand of a shift "
  4988		    "operation is negative. [expr.shift]\nA shift by %d was requested",
  4989		    __y);
  4990		if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
  4991		  __invoke_ub(
  4992		    "The behavior is undefined if the right operand of a shift "
  4993		    "operation is greater than or equal to the width of the promoted "
  4994		    "left operand. [expr.shift]\nA shift by %d was requested",
  4995		    __y);
  4996		return _SimdIntOperators::_S_make_derived(
  4997		  _Impl::_S_bit_shift_right(__data(__x), __y));
  4998	      }
  4999	
  5000	    // unary operators (for integral _Tp)
  5001	    _GLIBCXX_SIMD_CONSTEXPR _V operator~() const
  5002	    { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
  5003	  };
  5004	
  5005	//}}}1
  5006	/// @endcond
  5007	
  5008	// simd {{{
  5009	template <typename _Tp, typename _Abi>
  5010	  class simd : public _SimdIntOperators<
  5011			 simd<_Tp, _Abi>, typename _SimdTraits<_Tp, _Abi>::_SimdImpl,
  5012			 conjunction<is_integral<_Tp>,
  5013				     typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
  5014		       public _SimdTraits<_Tp, _Abi>::_SimdBase
  5015	  {
  5016	    using _Traits = _SimdTraits<_Tp, _Abi>;
  5017	    using _MemberType = typename _Traits::_SimdMember;
  5018	    using _CastType = typename _Traits::_SimdCastType;
  5019	    static constexpr _Tp* _S_type_tag = nullptr;
  5020	    friend typename _Traits::_SimdBase;
  5021	
  5022	  public:
  5023	    using _Impl = typename _Traits::_SimdImpl;
  5024	    friend _Impl;
  5025	    friend _SimdIntOperators<simd, _Impl, true>;
  5026	
  5027	    using value_type = _Tp;
  5028	    using reference = _SmartReference<_MemberType, _Impl, value_type>;
  5029	    using mask_type = simd_mask<_Tp, _Abi>;
  5030	    using abi_type = _Abi;
  5031	
  5032	    static constexpr size_t size()
  5033	    { return __size_or_zero_v<_Tp, _Abi>; }
  5034	
  5035	    _GLIBCXX_SIMD_CONSTEXPR simd() = default;
  5036	    _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
  5037	    _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
  5038	    _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
  5039	    _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
  5040	
  5041	    // implicit broadcast constructor
  5042	    template <typename _Up,
  5043		      typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
  5044	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
  5045	      simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
  5046	      : _M_data(
  5047		_Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
  5048	      {}
  5049	
  5050	    // implicit type conversion constructor (convert from fixed_size to
  5051	    // fixed_size)
  5052	    template <typename _Up>
  5053	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
  5054	      simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
  5055		   enable_if_t<
  5056		     conjunction<
  5057		       is_same<simd_abi::fixed_size<size()>, abi_type>,
  5058		       negation<__is_narrowing_conversion<_Up, value_type>>,
  5059		       __converts_to_higher_integer_rank<_Up, value_type>>::value,
  5060		     void*> = nullptr)
  5061	      : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
  5062	
  5063	      // explicit type conversion constructor
  5064	#ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
  5065	    template <typename _Up, typename _A2,
  5066		      typename = decltype(static_simd_cast<simd>(
  5067			declval<const simd<_Up, _A2>&>()))>
  5068	      _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
  5069	      simd(const simd<_Up, _A2>& __x)
  5070	      : simd(static_simd_cast<simd>(__x)) {}
  5071	#endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
  5072	
  5073	    // generator constructor
  5074	    template <typename _Fp>
  5075	      _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
  5076	      simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
  5077							declval<_SizeConstant<0>&>())),
  5078						      value_type>* = nullptr)
  5079	      : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
  5080	
  5081	    // load constructor
  5082	    template <typename _Up, typename _Flags>
  5083	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
  5084	      simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
  5085	      : _M_data(
  5086		  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
  5087	      {}
  5088	
  5089	    // loads [simd.load]
  5090	    template <typename _Up, typename _Flags>
  5091	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
  5092	      copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
  5093	      {
  5094		_M_data = static_cast<decltype(_M_data)>(
  5095		  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
  5096	      }
  5097	
  5098	    // stores [simd.store]
  5099	    template <typename _Up, typename _Flags>
  5100	      _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
  5101	      copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
  5102	      {
  5103		_Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
  5104				_S_type_tag);
  5105	      }
  5106	
  5107	    // scalar access
  5108	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
  5109	    operator[](size_t __i)
  5110	    { return {_M_data, int(__i)}; }
  5111	
  5112	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
  5113	    operator[]([[maybe_unused]] size_t __i) const
  5114	    {
  5115	      if constexpr (__is_scalar_abi<_Abi>())
  5116		{
  5117		  _GLIBCXX_DEBUG_ASSERT(__i == 0);
  5118		  return _M_data;
  5119		}
  5120	      else
  5121		return _M_data[__i];
  5122	    }
  5123	
  5124	    // increment and decrement:
  5125	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
  5126	    operator++()
  5127	    {
  5128	      _Impl::_S_increment(_M_data);
  5129	      return *this;
  5130	    }
  5131	
  5132	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
  5133	    operator++(int)
  5134	    {
  5135	      simd __r = *this;
  5136	      _Impl::_S_increment(_M_data);
  5137	      return __r;
  5138	    }
  5139	
  5140	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
  5141	    operator--()
  5142	    {
  5143	      _Impl::_S_decrement(_M_data);
  5144	      return *this;
  5145	    }
  5146	
  5147	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
  5148	    operator--(int)
  5149	    {
  5150	      simd __r = *this;
  5151	      _Impl::_S_decrement(_M_data);
  5152	      return __r;
  5153	    }
  5154	
  5155	    // unary operators (for any _Tp)
  5156	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
  5157	    operator!() const
  5158	    { return {__private_init, _Impl::_S_negate(_M_data)}; }
  5159	
  5160	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
  5161	    operator+() const
  5162	    { return *this; }
  5163	
  5164	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
  5165	    operator-() const
  5166	    { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
  5167	
  5168	    // access to internal representation (suggested extension)
  5169	    _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
  5170	    simd(_CastType __init) : _M_data(__init) {}
  5171	
  5172	    // compound assignment [simd.cassign]
  5173	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
  5174	    operator+=(simd& __lhs, const simd& __x)
  5175	    { return __lhs = __lhs + __x; }
  5176	
  5177	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
  5178	    operator-=(simd& __lhs, const simd& __x)
  5179	    { return __lhs = __lhs - __x; }
  5180	
  5181	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
  5182	    operator*=(simd& __lhs, const simd& __x)
  5183	    { return __lhs = __lhs * __x; }
  5184	
  5185	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
  5186	    operator/=(simd& __lhs, const simd& __x)
  5187	    { return __lhs = __lhs / __x; }
  5188	
  5189	    // binary operators [simd.binary]
  5190	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
  5191	    operator+(const simd& __x, const simd& __y)
  5192	    { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
  5193	
  5194	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
  5195	    operator-(const simd& __x, const simd& __y)
  5196	    { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
  5197	
  5198	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
  5199	    operator*(const simd& __x, const simd& __y)
  5200	    { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
  5201	
  5202	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
  5203	    operator/(const simd& __x, const simd& __y)
  5204	    { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
  5205	
  5206	    // compares [simd.comparison]
  5207	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
  5208	    operator==(const simd& __x, const simd& __y)
  5209	    { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
  5210	
  5211	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
  5212	    operator!=(const simd& __x, const simd& __y)
  5213	    {
  5214	      return simd::_S_make_mask(
  5215		_Impl::_S_not_equal_to(__x._M_data, __y._M_data));
  5216	    }
  5217	
  5218	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
  5219	    operator<(const simd& __x, const simd& __y)
  5220	    { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
  5221	
  5222	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
  5223	    operator<=(const simd& __x, const simd& __y)
  5224	    {
  5225	      return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
  5226	    }
  5227	
  5228	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
  5229	    operator>(const simd& __x, const simd& __y)
  5230	    { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
  5231	
  5232	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
  5233	    operator>=(const simd& __x, const simd& __y)
  5234	    {
  5235	      return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
  5236	    }
  5237	
  5238	    // operator?: overloads (suggested extension) {{{
  5239	#ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
  5240	    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
  5241	    operator?:(const mask_type& __k, const simd& __where_true,
  5242		const simd& __where_false)
  5243	    {
  5244	      auto __ret = __where_false;
  5245	      _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
  5246	      return __ret;
  5247	    }
  5248	
  5249	#endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
  5250	    // }}}
  5251	
  5252	    // "private" because of the first arguments's namespace
  5253	    _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
  5254	    simd(_PrivateInit, const _MemberType& __init)
  5255	    : _M_data(__init) {}
  5256	
  5257	    // "private" because of the first arguments's namespace
  5258	    _GLIBCXX_SIMD_INTRINSIC
  5259	    simd(_BitsetInit, bitset<size()> __init) : _M_data()
  5260	    { where(mask_type(__bitset_init, __init), *this) = ~*this; }
  5261	
  5262	    _GLIBCXX_SIMD_INTRINSIC constexpr bool
  5263	    _M_is_constprop() const
  5264	    {
  5265	      if constexpr (__is_scalar_abi<_Abi>())
  5266		return __builtin_constant_p(_M_data);
  5267	      else
  5268		return _M_data._M_is_constprop();
  5269	    }
  5270	
  5271	  private:
  5272	    _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
  5273	    _S_make_mask(typename mask_type::_MemberType __k)
  5274	    { return {__private_init, __k}; }
  5275	
  5276	    friend const auto& __data<value_type, abi_type>(const simd&);
  5277	    friend auto& __data<value_type, abi_type>(simd&);
  5278	    alignas(_Traits::_S_simd_align) _MemberType _M_data;
  5279	  };
  5280	
  5281	// }}}
  5282	/// @cond undocumented
  5283	// __data {{{
  5284	template <typename _Tp, typename _Ap>
  5285	  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
  5286	  __data(const simd<_Tp, _Ap>& __x)
  5287	  { return __x._M_data; }
  5288	
  5289	template <typename _Tp, typename _Ap>
  5290	  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
  5291	  __data(simd<_Tp, _Ap>& __x)
  5292	  { return __x._M_data; }
  5293	
  5294	// }}}
  5295	namespace __float_bitwise_operators { //{{{
  5296	template <typename _Tp, typename _Ap>
  5297	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  5298	  operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  5299	  { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
  5300	
  5301	template <typename _Tp, typename _Ap>
  5302	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  5303	  operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  5304	  { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
  5305	
  5306	template <typename _Tp, typename _Ap>
  5307	  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
  5308	  operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
  5309	  { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
  5310	} // namespace __float_bitwise_operators }}}
  5311	/// @endcond
  5312	
  5313	/// @}
  5314	_GLIBCXX_SIMD_END_NAMESPACE
  5315	
  5316	#endif // __cplusplus >= 201703L
  5317	#endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
  5318	
  5319	// vim: foldmethod=marker foldmarker={{{,}}}