Where Online Learning is simpler!

The C and C++ Include Header Files

/usr/include/c++/11/experimental/bits/simd_math.h


$ cat -n /usr/include/c++/11/experimental/bits/simd_math.h

     1	// Math overloads for simd -*- C++ -*-
     2	
     3	// Copyright (C) 2020-2021 Free Software Foundation, Inc.
     4	//
     5	// This file is part of the GNU ISO C++ Library.  This library is free
     6	// software; you can redistribute it and/or modify it under the
     7	// terms of the GNU General Public License as published by the
     8	// Free Software Foundation; either version 3, or (at your option)
     9	// any later version.
    10	
    11	// This library is distributed in the hope that it will be useful,
    12	// but WITHOUT ANY WARRANTY; without even the implied warranty of
    13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14	// GNU General Public License for more details.
    15	
    16	// Under Section 7 of GPL version 3, you are granted additional
    17	// permissions described in the GCC Runtime Library Exception, version
    18	// 3.1, as published by the Free Software Foundation.
    19	
    20	// You should have received a copy of the GNU General Public License and
    21	// a copy of the GCC Runtime Library Exception along with this program;
    22	// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    23	// <http://www.gnu.org/licenses/>.
    24	
    25	#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
    26	#define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
    27	
    28	#if __cplusplus >= 201703L
    29	
    30	#include <utility>
    31	#include <iomanip>
    32	
    33	_GLIBCXX_SIMD_BEGIN_NAMESPACE
    34	template <typename _Tp, typename _V>
    35	  using _Samesize = fixed_size_simd<_Tp, _V::size()>;
    36	
    37	// _Math_return_type {{{
    38	template <typename _DoubleR, typename _Tp, typename _Abi>
    39	  struct _Math_return_type;
    40	
    41	template <typename _DoubleR, typename _Tp, typename _Abi>
    42	  using _Math_return_type_t =
    43	    typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
    44	
    45	template <typename _Tp, typename _Abi>
    46	  struct _Math_return_type<double, _Tp, _Abi>
    47	  { using type = simd<_Tp, _Abi>; };
    48	
    49	template <typename _Tp, typename _Abi>
    50	  struct _Math_return_type<bool, _Tp, _Abi>
    51	  { using type = simd_mask<_Tp, _Abi>; };
    52	
    53	template <typename _DoubleR, typename _Tp, typename _Abi>
    54	  struct _Math_return_type
    55	  { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
    56	
    57	//}}}
    58	// _GLIBCXX_SIMD_MATH_CALL_ {{{
    59	#define _GLIBCXX_SIMD_MATH_CALL_(__name)                                       \
    60	template <typename _Tp, typename _Abi, typename...,                            \
    61		  typename _R = _Math_return_type_t<                                   \
    62		    decltype(std::__name(declval<double>())), _Tp, _Abi>>              \
    63	  enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
    64	  __name(simd<_Tp, _Abi> __x)                                                  \
    65	  { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
    66	
    67	// }}}
    68	//_Extra_argument_type{{{
    69	template <typename _Up, typename _Tp, typename _Abi>
    70	  struct _Extra_argument_type;
    71	
    72	template <typename _Tp, typename _Abi>
    73	  struct _Extra_argument_type<_Tp*, _Tp, _Abi>
    74	  {
    75	    using type = simd<_Tp, _Abi>*;
    76	    static constexpr double* declval();
    77	    static constexpr bool __needs_temporary_scalar = true;
    78	
    79	    _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
    80	    { return &__data(*__x); }
    81	  };
    82	
    83	template <typename _Up, typename _Tp, typename _Abi>
    84	  struct _Extra_argument_type<_Up*, _Tp, _Abi>
    85	  {
    86	    static_assert(is_integral_v<_Up>);
    87	    using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
    88	    static constexpr _Up* declval();
    89	    static constexpr bool __needs_temporary_scalar = true;
    90	
    91	    _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
    92	    { return &__data(*__x); }
    93	  };
    94	
    95	template <typename _Tp, typename _Abi>
    96	  struct _Extra_argument_type<_Tp, _Tp, _Abi>
    97	  {
    98	    using type = simd<_Tp, _Abi>;
    99	    static constexpr double declval();
   100	    static constexpr bool __needs_temporary_scalar = false;
   101	
   102	    _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
   103	    _S_data(const type& __x)
   104	    { return __data(__x); }
   105	  };
   106	
   107	template <typename _Up, typename _Tp, typename _Abi>
   108	  struct _Extra_argument_type
   109	  {
   110	    static_assert(is_integral_v<_Up>);
   111	    using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
   112	    static constexpr _Up declval();
   113	    static constexpr bool __needs_temporary_scalar = false;
   114	
   115	    _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
   116	    _S_data(const type& __x)
   117	    { return __data(__x); }
   118	  };
   119	
   120	//}}}
   121	// _GLIBCXX_SIMD_MATH_CALL2_ {{{
   122	#define _GLIBCXX_SIMD_MATH_CALL2_(__name, arg2_)                               \
   123	template <                                                                     \
   124	  typename _Tp, typename _Abi, typename...,                                    \
   125	  typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>,                     \
   126	  typename _R = _Math_return_type_t<                                           \
   127	    decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>>    \
   128	  enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
   129	  __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y)          \
   130	  {                                                                            \
   131	    return {__private_init,                                                    \
   132		    _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))};   \
   133	  }                                                                            \
   134	template <typename _Up, typename _Tp, typename _Abi>                           \
   135	  _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t<                                 \
   136	    decltype(std::__name(                                                      \
   137	      declval<double>(),                                                       \
   138	      declval<enable_if_t<                                                     \
   139		conjunction_v<                                                         \
   140		  is_same<arg2_, _Tp>,                                                 \
   141		  negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>,           \
   142		  is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>,       \
   143		double>>())),                                                          \
   144	    _Tp, _Abi>                                                                 \
   145	  __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy)                              \
   146	  { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
   147	
   148	// }}}
   149	// _GLIBCXX_SIMD_MATH_CALL3_ {{{
   150	#define _GLIBCXX_SIMD_MATH_CALL3_(__name, arg2_, arg3_)                        \
   151	template <typename _Tp, typename _Abi, typename...,                            \
   152		  typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>,             \
   153		  typename _Arg3 = _Extra_argument_type<arg3_, _Tp, _Abi>,             \
   154		  typename _R = _Math_return_type_t<                                   \
   155		    decltype(std::__name(declval<double>(), _Arg2::declval(),          \
   156					 _Arg3::declval())),                           \
   157		    _Tp, _Abi>>                                                        \
   158	  enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
   159	  __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y,          \
   160		 const typename _Arg3::type& __z)                                      \
   161	  {                                                                            \
   162	    return {__private_init,                                                    \
   163		    _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y),     \
   164						 _Arg3::_S_data(__z))};                \
   165	  }                                                                            \
   166	template <                                                                     \
   167	  typename _T0, typename _T1, typename _T2, typename...,                       \
   168	  typename _U0 = __remove_cvref_t<_T0>,                                        \
   169	  typename _U1 = __remove_cvref_t<_T1>,                                        \
   170	  typename _U2 = __remove_cvref_t<_T2>,                                        \
   171	  typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>,                    \
   172	  typename = enable_if_t<conjunction_v<                                        \
   173	    is_simd<_Simd>, is_convertible<_T0&&, _Simd>,                              \
   174	    is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>,                \
   175	    negation<conjunction<                                                      \
   176	      is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>>    \
   177	  _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(),             \
   178						  declval<const _Simd&>(),             \
   179						  declval<const _Simd&>()))            \
   180	  __name(_T0&& __xx, _T1&& __yy, _T2&& __zz)                                   \
   181	  {                                                                            \
   182	    return __name(_Simd(static_cast<_T0&&>(__xx)),                             \
   183			  _Simd(static_cast<_T1&&>(__yy)),                             \
   184			  _Simd(static_cast<_T2&&>(__zz)));                            \
   185	  }
   186	
   187	// }}}
   188	// __cosSeries {{{
   189	template <typename _Abi>
   190	  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
   191	  __cosSeries(const simd<float, _Abi>& __x)
   192	  {
   193	    const simd<float, _Abi> __x2 = __x * __x;
   194	    simd<float, _Abi> __y;
   195	    __y = 0x1.ap-16f;                  //  1/8!
   196	    __y = __y * __x2 - 0x1.6c1p-10f;   // -1/6!
   197	    __y = __y * __x2 + 0x1.555556p-5f; //  1/4!
   198	    return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
   199	  }
   200	
   201	template <typename _Abi>
   202	  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
   203	  __cosSeries(const simd<double, _Abi>& __x)
   204	  {
   205	    const simd<double, _Abi> __x2 = __x * __x;
   206	    simd<double, _Abi> __y;
   207	    __y = 0x1.AC00000000000p-45;              //  1/16!
   208	    __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
   209	    __y = __y * __x2 + 0x1.1EED8C0000000p-29; //  1/12!
   210	    __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
   211	    __y = __y * __x2 + 0x1.A01A01A018000p-16; //  1/8!
   212	    __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
   213	    __y = __y * __x2 + 0x1.5555555555554p-5;  //  1/4!
   214	    return (__y * __x2 - .5f) * __x2 + 1.f;
   215	  }
   216	
   217	// }}}
   218	// __sinSeries {{{
   219	template <typename _Abi>
   220	  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
   221	  __sinSeries(const simd<float, _Abi>& __x)
   222	  {
   223	    const simd<float, _Abi> __x2 = __x * __x;
   224	    simd<float, _Abi> __y;
   225	    __y = -0x1.9CC000p-13f;            // -1/7!
   226	    __y = __y * __x2 + 0x1.111100p-7f; //  1/5!
   227	    __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
   228	    return __y * (__x2 * __x) + __x;
   229	  }
   230	
   231	template <typename _Abi>
   232	  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
   233	  __sinSeries(const simd<double, _Abi>& __x)
   234	  {
   235	    // __x  = [0, 0.7854 = pi/4]
   236	    // __x² = [0, 0.6169 = pi²/8]
   237	    const simd<double, _Abi> __x2 = __x * __x;
   238	    simd<double, _Abi> __y;
   239	    __y = -0x1.ACF0000000000p-41;             // -1/15!
   240	    __y = __y * __x2 + 0x1.6124400000000p-33; //  1/13!
   241	    __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
   242	    __y = __y * __x2 + 0x1.71DE3A5540000p-19; //  1/9!
   243	    __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
   244	    __y = __y * __x2 + 0x1.1111111111110p-7;  //  1/5!
   245	    __y = __y * __x2 - 0x1.5555555555555p-3;  // -1/3!
   246	    return __y * (__x2 * __x) + __x;
   247	  }
   248	
   249	// }}}
   250	// __zero_low_bits {{{
   251	template <int _Bits, typename _Tp, typename _Abi>
   252	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
   253	  __zero_low_bits(simd<_Tp, _Abi> __x)
   254	  {
   255	    const simd<_Tp, _Abi> __bitmask
   256	      = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
   257	    return {__private_init,
   258		    _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
   259	  }
   260	
   261	// }}}
   262	// __fold_input {{{
   263	
   264	/**@internal
   265	 * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
   266	 * quadrant 0: [-¼π,  ¼π]
   267	 * quadrant 1: [ ¼π,  ¾π]
   268	 * quadrant 2: [ ¾π, 1¼π]
   269	 * quadrant 3: [1¼π, 1¾π]
   270	 *
   271	 * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
   272	 * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
   273	 * ```
   274	 * y = trunc(x / ¼π);
   275	 * y += fmod(y, 2);
   276	 * ```
   277	 * This can be simplified by moving the (implicit) division by 2 into the
   278	 * truncation expression. The `+= fmod` effect can the be achieved by using
   279	 * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
   280	 * `2/π * x` is better (faster).
   281	 */
   282	template <typename _Tp, typename _Abi>
   283	  struct _Folded
   284	  {
   285	    simd<_Tp, _Abi> _M_x;
   286	    rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
   287	  };
   288	
   289	namespace __math_float {
   290	inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
   291	inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π
   292	inline constexpr float __pi_2_5bits0
   293	  = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
   294	inline constexpr float __pi_2_5bits0_rem
   295	  = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
   296	} // namespace __math_float
   297	namespace __math_double {
   298	inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
   299	inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π
   300	inline constexpr double __pi_2 = 0x1.921fb54442d18p0;       // π/2
   301	} // namespace __math_double
   302	
   303	template <typename _Abi>
   304	  _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
   305	  __fold_input(const simd<float, _Abi>& __x)
   306	  {
   307	    using _V = simd<float, _Abi>;
   308	    using _IV = rebind_simd_t<int, _V>;
   309	    using namespace __math_float;
   310	    _Folded<float, _Abi> __r;
   311	    __r._M_x = abs(__x);
   312	#if 0
   313	    // zero most mantissa bits:
   314	    constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π
   315	    const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
   316	    // split π into 4 parts, the first three with 13 trailing zeros (to make the
   317	    // following multiplications precise):
   318	    constexpr float __pi0 = 0x1.920000p1f;
   319	    constexpr float __pi1 = 0x1.fb4000p-11f;
   320	    constexpr float __pi2 = 0x1.444000p-23f;
   321	    constexpr float __pi3 = 0x1.68c234p-38f;
   322	    __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
   323	#else
   324	    if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
   325	      __r._M_quadrant = 0;
   326	    else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
   327	      {
   328		const _V __y = nearbyint(__r._M_x * __2_over_pi);
   329		__r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
   330		__r._M_x -= __y * __pi_2_5bits0;
   331		__r._M_x -= __y * __pi_2_5bits0_rem;
   332	      }
   333	    else
   334	      {
   335		using __math_double::__2_over_pi;
   336		using __math_double::__pi_2;
   337		using _VD = rebind_simd_t<double, _V>;
   338		_VD __xd = static_simd_cast<_VD>(__r._M_x);
   339		_VD __y = nearbyint(__xd * __2_over_pi);
   340		__r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
   341		__r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
   342	      }
   343	#endif
   344	    return __r;
   345	  }
   346	
   347	template <typename _Abi>
   348	  _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
   349	  __fold_input(const simd<double, _Abi>& __x)
   350	  {
   351	    using _V = simd<double, _Abi>;
   352	    using _IV = rebind_simd_t<int, _V>;
   353	    using namespace __math_double;
   354	
   355	    _Folded<double, _Abi> __r;
   356	    __r._M_x = abs(__x);
   357	    if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
   358	      {
   359		__r._M_quadrant = 0;
   360		return __r;
   361	      }
   362	    const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
   363	    __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
   364	
   365	    if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
   366	      {
   367		// x - y * pi/2, y uses no more than 11 mantissa bits
   368		__r._M_x -= __y * 0x1.921FB54443000p0;
   369		__r._M_x -= __y * -0x1.73DCB3B39A000p-43;
   370		__r._M_x -= __y * 0x1.45C06E0E68948p-86;
   371	      }
   372	    else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
   373	      {
   374		// x - y * pi/2, y uses no more than 29 mantissa bits
   375		__r._M_x -= __y * 0x1.921FB40000000p0;
   376		__r._M_x -= __y * 0x1.4442D00000000p-24;
   377		__r._M_x -= __y * 0x1.8469898CC5170p-48;
   378	      }
   379	    else
   380	      {
   381		// x - y * pi/2, y may require all mantissa bits
   382		const _V __y_hi = __zero_low_bits<26>(__y);
   383		const _V __y_lo = __y - __y_hi;
   384		const auto __pi_2_1 = 0x1.921FB50000000p0;
   385		const auto __pi_2_2 = 0x1.110B460000000p-26;
   386		const auto __pi_2_3 = 0x1.1A62630000000p-54;
   387		const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
   388		__r._M_x = __r._M_x - __y_hi * __pi_2_1
   389			   - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
   390			   - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
   391			   - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
   392			   - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
   393			   - max(__y * __pi_2_4, __y_lo * __pi_2_3)
   394			   - min(__y * __pi_2_4, __y_lo * __pi_2_3);
   395	      }
   396	    return __r;
   397	  }
   398	
   399	// }}}
   400	// __extract_exponent_as_int {{{
   401	template <typename _Tp, typename _Abi>
   402	  rebind_simd_t<int, simd<_Tp, _Abi>>
   403	  __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
   404	  {
   405	    using _Vp = simd<_Tp, _Abi>;
   406	    using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
   407	    using namespace std::experimental::__float_bitwise_operators;
   408	    const _Vp __exponent_mask
   409	      = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
   410	    return static_simd_cast<rebind_simd_t<int, _Vp>>(
   411	      __bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
   412	      >> (__digits_v<_Tp> - 1));
   413	  }
   414	
   415	// }}}
   416	// __impl_or_fallback {{{
   417	template <typename ImplFun, typename FallbackFun, typename... _Args>
   418	  _GLIBCXX_SIMD_INTRINSIC auto
   419	  __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
   420				      _Args&&... __args)
   421	    -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
   422	  { return __impl_fun(static_cast<_Args&&>(__args)...); }
   423	
   424	template <typename ImplFun, typename FallbackFun, typename... _Args>
   425	  inline auto
   426	  __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
   427				      _Args&&... __args)
   428	    -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
   429	  { return __fallback_fun(static_cast<_Args&&>(__args)...); }
   430	
   431	template <typename... _Args>
   432	  _GLIBCXX_SIMD_INTRINSIC auto
   433	  __impl_or_fallback(_Args&&... __args)
   434	  {
   435	    return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
   436	  }
   437	//}}}
   438	
   439	// trigonometric functions {{{
   440	_GLIBCXX_SIMD_MATH_CALL_(acos)
   441	_GLIBCXX_SIMD_MATH_CALL_(asin)
   442	_GLIBCXX_SIMD_MATH_CALL_(atan)
   443	_GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
   444	
   445	/*
   446	 * algorithm for sine and cosine:
   447	 *
   448	 * The result can be calculated with sine or cosine depending on the π/4 section
   449	 * the input is in. sine   ≈ __x + __x³ cosine ≈ 1 - __x²
   450	 *
   451	 * sine:
   452	 * Map -__x to __x and invert the output
   453	 * Extend precision of __x - n * π/4 by calculating
   454	 * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
   455	 *
   456	 * Calculate Taylor series with tuned coefficients.
   457	 * Fix sign.
   458	 */
   459	// cos{{{
   460	template <typename _Tp, typename _Abi>
   461	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   462	  cos(const simd<_Tp, _Abi>& __x)
   463	  {
   464	    using _V = simd<_Tp, _Abi>;
   465	    if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
   466	      return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
   467	    else
   468	      {
   469		if constexpr (is_same_v<_Tp, float>)
   470		  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
   471		    return static_simd_cast<_V>(
   472		      cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
   473	
   474		const auto __f = __fold_input(__x);
   475		// quadrant | effect
   476		//        0 | cosSeries, +
   477		//        1 | sinSeries, -
   478		//        2 | cosSeries, -
   479		//        3 | sinSeries, +
   480		using namespace std::experimental::__float_bitwise_operators;
   481		const _V __sign_flip
   482		  = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
   483	
   484		const auto __need_cos = (__f._M_quadrant & 1) == 0;
   485		if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
   486		  return __sign_flip ^ __cosSeries(__f._M_x);
   487		else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
   488		  return __sign_flip ^ __sinSeries(__f._M_x);
   489		else // some_of(__need_cos)
   490		  {
   491		    _V __r = __sinSeries(__f._M_x);
   492		    where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
   493		    return __r ^ __sign_flip;
   494		  }
   495	      }
   496	  }
   497	
   498	template <typename _Tp>
   499	  _GLIBCXX_SIMD_ALWAYS_INLINE
   500	  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
   501	  cos(simd<_Tp, simd_abi::scalar> __x)
   502	  { return std::cos(__data(__x)); }
   503	
   504	//}}}
   505	// sin{{{
   506	template <typename _Tp, typename _Abi>
   507	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   508	  sin(const simd<_Tp, _Abi>& __x)
   509	  {
   510	    using _V = simd<_Tp, _Abi>;
   511	    if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
   512	      return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
   513	    else
   514	      {
   515		if constexpr (is_same_v<_Tp, float>)
   516		  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
   517		    return static_simd_cast<_V>(
   518		      sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
   519	
   520		const auto __f = __fold_input(__x);
   521		// quadrant | effect
   522		//        0 | sinSeries
   523		//        1 | cosSeries
   524		//        2 | sinSeries, sign flip
   525		//        3 | cosSeries, sign flip
   526		using namespace std::experimental::__float_bitwise_operators;
   527		const auto __sign_flip
   528		  = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
   529	
   530		const auto __need_sin = (__f._M_quadrant & 1) == 0;
   531		if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
   532		  return __sign_flip ^ __sinSeries(__f._M_x);
   533		else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
   534		  return __sign_flip ^ __cosSeries(__f._M_x);
   535		else // some_of(__need_sin)
   536		  {
   537		    _V __r = __cosSeries(__f._M_x);
   538		    where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
   539		    return __sign_flip ^ __r;
   540		  }
   541	      }
   542	  }
   543	
   544	template <typename _Tp>
   545	  _GLIBCXX_SIMD_ALWAYS_INLINE
   546	  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
   547	  sin(simd<_Tp, simd_abi::scalar> __x)
   548	  { return std::sin(__data(__x)); }
   549	
   550	//}}}
   551	_GLIBCXX_SIMD_MATH_CALL_(tan)
   552	_GLIBCXX_SIMD_MATH_CALL_(acosh)
   553	_GLIBCXX_SIMD_MATH_CALL_(asinh)
   554	_GLIBCXX_SIMD_MATH_CALL_(atanh)
   555	_GLIBCXX_SIMD_MATH_CALL_(cosh)
   556	_GLIBCXX_SIMD_MATH_CALL_(sinh)
   557	_GLIBCXX_SIMD_MATH_CALL_(tanh)
   558	// }}}
   559	// exponential functions {{{
   560	_GLIBCXX_SIMD_MATH_CALL_(exp)
   561	_GLIBCXX_SIMD_MATH_CALL_(exp2)
   562	_GLIBCXX_SIMD_MATH_CALL_(expm1)
   563	
   564	// }}}
   565	// frexp {{{
   566	#if _GLIBCXX_SIMD_X86INTRIN
   567	template <typename _Tp, size_t _Np>
   568	  _SimdWrapper<_Tp, _Np>
   569	  __getexp(_SimdWrapper<_Tp, _Np> __x)
   570	  {
   571	    if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
   572	      return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
   573	    else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
   574	      return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
   575	    else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
   576	      return _mm_getexp_pd(__x);
   577	    else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
   578	      return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
   579	    else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
   580	      return _mm256_getexp_ps(__x);
   581	    else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
   582	      return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
   583	    else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
   584	      return _mm256_getexp_pd(__x);
   585	    else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
   586	      return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
   587	    else if constexpr (__is_avx512_ps<_Tp, _Np>())
   588	      return _mm512_getexp_ps(__x);
   589	    else if constexpr (__is_avx512_pd<_Tp, _Np>())
   590	      return _mm512_getexp_pd(__x);
   591	    else
   592	      __assert_unreachable<_Tp>();
   593	  }
   594	
   595	template <typename _Tp, size_t _Np>
   596	  _SimdWrapper<_Tp, _Np>
   597	  __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
   598	  {
   599	    if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
   600	      return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
   601						   _MM_MANT_SIGN_src));
   602	    else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
   603	      return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
   604						      _MM_MANT_NORM_p5_1,
   605						      _MM_MANT_SIGN_src));
   606	    else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
   607	      return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
   608	    else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
   609	      return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
   610					       _MM_MANT_SIGN_src));
   611	    else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
   612	      return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
   613	    else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
   614	      return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
   615					       _MM_MANT_SIGN_src));
   616	    else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
   617	      return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
   618	    else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
   619	      return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
   620					       _MM_MANT_SIGN_src));
   621	    else if constexpr (__is_avx512_ps<_Tp, _Np>())
   622	      return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
   623	    else if constexpr (__is_avx512_pd<_Tp, _Np>())
   624	      return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
   625	    else
   626	      __assert_unreachable<_Tp>();
   627	  }
   628	#endif // _GLIBCXX_SIMD_X86INTRIN
   629	
   630	/**
   631	 * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
   632	 *
   633	 * The return value will be in the range [0.5, 1.0[
   634	 * The @p __e value will be an integer defining the power-of-two exponent
   635	 */
   636	template <typename _Tp, typename _Abi>
   637	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   638	  frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
   639	  {
   640	    if constexpr (simd_size_v<_Tp, _Abi> == 1)
   641	      {
   642		int __tmp;
   643		const auto __r = std::frexp(__x[0], &__tmp);
   644		(*__exp)[0] = __tmp;
   645		return __r;
   646	      }
   647	    else if constexpr (__is_fixed_size_abi_v<_Abi>)
   648	      {
   649		return {__private_init,
   650			_Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
   651	#if _GLIBCXX_SIMD_X86INTRIN
   652	      }
   653	    else if constexpr (__have_avx512f)
   654	      {
   655		constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
   656		constexpr size_t _NI = _Np < 4 ? 4 : _Np;
   657		const auto __v = __data(__x);
   658		const auto __isnonzero
   659		  = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
   660		const _SimdWrapper<int, _NI> __exp_plus1
   661		  = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
   662		const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
   663		  _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
   664					      _SimdWrapper<int, _NI>(), __exp_plus1));
   665		simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
   666		return {__private_init,
   667			_Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
   668						      __isnonzero),
   669						    __v, __getmant_avx512(__v))};
   670	#endif // _GLIBCXX_SIMD_X86INTRIN
   671	      }
   672	    else
   673	      {
   674		// fallback implementation
   675		static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
   676		using _V = simd<_Tp, _Abi>;
   677		using _IV = rebind_simd_t<int, _V>;
   678		using namespace std::experimental::__proposed;
   679		using namespace std::experimental::__float_bitwise_operators;
   680	
   681		constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
   682		constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
   683		constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
   684		_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
   685		  = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
   686		_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
   687		  = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
   688	
   689		_V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
   690		const _IV __exponent_bits = __extract_exponent_as_int(__x);
   691		if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
   692		  {
   693		    *__exp
   694		      = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
   695		    return __mant;
   696		  }
   697	
   698	#if __FINITE_MATH_ONLY__
   699		// at least one element of __x is 0 or subnormal, the rest is normal
   700		// (inf and NaN are excluded by -ffinite-math-only)
   701		const auto __iszero_inf_nan = __x == 0;
   702	#else
   703		const auto __as_int
   704		  = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(abs(__x));
   705		const auto __inf
   706		  = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(
   707		    _V(__infinity_v<_Tp>));
   708		const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
   709		  __as_int == 0 || __as_int >= __inf);
   710	#endif
   711	
   712		const _V __scaled_subnormal = __x * __subnorm_scale;
   713		const _V __mant_subnormal
   714		  = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
   715		where(!isnormal(__x), __mant) = __mant_subnormal;
   716		where(__iszero_inf_nan, __mant) = __x;
   717		_IV __e = __extract_exponent_as_int(__scaled_subnormal);
   718		using _MaskType =
   719		  typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
   720					 _V, _IV>::mask_type;
   721		const _MaskType __value_isnormal = isnormal(__x).__cvt();
   722		where(__value_isnormal.__cvt(), __e) = __exponent_bits;
   723		static_assert(sizeof(_IV) == sizeof(__value_isnormal));
   724		const _IV __offset
   725		  = (__bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
   726		    | (__bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
   727				       & static_simd_cast<_MaskType>(__x != 0))
   728		       & _IV(__exp_adjust + __exp_offset));
   729		*__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
   730		return __mant;
   731	      }
   732	  }
   733	
   734	// }}}
   735	_GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
   736	_GLIBCXX_SIMD_MATH_CALL_(ilogb)
   737	
   738	// logarithms {{{
   739	_GLIBCXX_SIMD_MATH_CALL_(log)
   740	_GLIBCXX_SIMD_MATH_CALL_(log10)
   741	_GLIBCXX_SIMD_MATH_CALL_(log1p)
   742	_GLIBCXX_SIMD_MATH_CALL_(log2)
   743	
   744	//}}}
   745	// logb{{{
   746	template <typename _Tp, typename _Abi>
   747	  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
   748	  logb(const simd<_Tp, _Abi>& __x)
   749	  {
   750	    constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
   751	    if constexpr (_Np == 1)
   752	      return std::logb(__x[0]);
   753	    else if constexpr (__is_fixed_size_abi_v<_Abi>)
   754	      {
   755		return {__private_init,
   756			__data(__x)._M_apply_per_chunk([](auto __impl, auto __xx) {
   757			  using _V = typename decltype(__impl)::simd_type;
   758			  return __data(
   759			    std::experimental::logb(_V(__private_init, __xx)));
   760			})};
   761	      }
   762	#if _GLIBCXX_SIMD_X86INTRIN // {{{
   763	    else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
   764	      return {__private_init,
   765		      __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
   766	    else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
   767	      return {__private_init, _mm_getexp_pd(__data(__x))};
   768	    else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
   769	      return {__private_init, _mm256_getexp_ps(__data(__x))};
   770	    else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
   771	      return {__private_init, _mm256_getexp_pd(__data(__x))};
   772	    else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
   773	      return {__private_init,
   774		      __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
   775	    else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
   776	      return {__private_init,
   777		      __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
   778	    else if constexpr (__is_avx512_ps<_Tp, _Np>())
   779	      return {__private_init, _mm512_getexp_ps(__data(__x))};
   780	    else if constexpr (__is_avx512_pd<_Tp, _Np>())
   781	      return {__private_init, _mm512_getexp_pd(__data(__x))};
   782	#endif // _GLIBCXX_SIMD_X86INTRIN }}}
   783	    else
   784	      {
   785		using _V = simd<_Tp, _Abi>;
   786		using namespace std::experimental::__proposed;
   787		auto __is_normal = isnormal(__x);
   788	
   789		// work on abs(__x) to reflect the return value on Linux for negative
   790		// inputs (domain-error => implementation-defined value is returned)
   791		const _V abs_x = abs(__x);
   792	
   793		// __exponent(__x) returns the exponent value (bias removed) as
   794		// simd<_Up> with integral _Up
   795		auto&& __exponent = [](const _V& __v) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
   796		  using namespace std::experimental::__proposed;
   797		  using _IV = rebind_simd_t<
   798		    conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
   799		  return (__bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
   800			 - (__max_exponent_v<_Tp> - 1);
   801		};
   802		_V __r = static_simd_cast<_V>(__exponent(abs_x));
   803		if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
   804		  // without corner cases (nan, inf, subnormal, zero) we have our
   805		  // answer:
   806		  return __r;
   807		const auto __is_zero = __x == 0;
   808		const auto __is_nan = isnan(__x);
   809		const auto __is_inf = isinf(__x);
   810		where(__is_zero, __r) = -__infinity_v<_Tp>;
   811		where(__is_nan, __r) = __x;
   812		where(__is_inf, __r) = __infinity_v<_Tp>;
   813		__is_normal |= __is_zero || __is_nan || __is_inf;
   814		if (all_of(__is_normal))
   815		  // at this point everything but subnormals is handled
   816		  return __r;
   817		// subnormals repeat the exponent extraction after multiplication of the
   818		// input with __a floating point value that has 112 (0x70) in its exponent
   819		// (not too big for sp and large enough for dp)
   820		const _V __scaled = abs_x * _Tp(0x1p112);
   821		_V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
   822		where(__is_normal, __scaled_exp) = __r;
   823		return __scaled_exp;
   824	      }
   825	  }
   826	
   827	//}}}
   828	template <typename _Tp, typename _Abi>
   829	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   830	  modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
   831	  {
   832	    if constexpr (__is_scalar_abi<_Abi>()
   833			  || (__is_fixed_size_abi_v<
   834				_Abi> && simd_size_v<_Tp, _Abi> == 1))
   835	      {
   836		_Tp __tmp;
   837		_Tp __r = std::modf(__x[0], &__tmp);
   838		__iptr[0] = __tmp;
   839		return __r;
   840	      }
   841	    else
   842	      {
   843		const auto __integral = trunc(__x);
   844		*__iptr = __integral;
   845		auto __r = __x - __integral;
   846	#if !__FINITE_MATH_ONLY__
   847		where(isinf(__x), __r) = _Tp();
   848	#endif
   849		return copysign(__r, __x);
   850	      }
   851	  }
   852	
   853	_GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
   854	_GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
   855	
   856	_GLIBCXX_SIMD_MATH_CALL_(cbrt)
   857	
   858	_GLIBCXX_SIMD_MATH_CALL_(abs)
   859	_GLIBCXX_SIMD_MATH_CALL_(fabs)
   860	
   861	// [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
   862	// allow signed integral _Tp
   863	template <typename _Tp, typename _Abi>
   864	  enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
   865	  abs(const simd<_Tp, _Abi>& __x)
   866	  { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
   867	
   868	template <typename _Tp, typename _Abi>
   869	  enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
   870	  fabs(const simd<_Tp, _Abi>& __x)
   871	  { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
   872	
   873	// the following are overloads for functions in <cstdlib> and not covered by
   874	// [parallel.simd.math]. I don't see much value in making them work, though
   875	/*
   876	template <typename _Abi> simd<long, _Abi> labs(const simd<long, _Abi> &__x)
   877	{ return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
   878	
   879	template <typename _Abi> simd<long long, _Abi> llabs(const simd<long long, _Abi>
   880	&__x)
   881	{ return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
   882	*/
   883	
   884	#define _GLIBCXX_SIMD_CVTING2(_NAME)                                           \
   885	template <typename _Tp, typename _Abi>                                         \
   886	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
   887	    const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
   888	  {                                                                            \
   889	    return _NAME(__x, __y);                                                    \
   890	  }                                                                            \
   891	                                                                               \
   892	template <typename _Tp, typename _Abi>                                         \
   893	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
   894	    const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
   895	  {                                                                            \
   896	    return _NAME(__x, __y);                                                    \
   897	  }
   898	
   899	#define _GLIBCXX_SIMD_CVTING3(_NAME)                                           \
   900	template <typename _Tp, typename _Abi>                                         \
   901	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
   902	    const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
   903	    const simd<_Tp, _Abi>& __z)                                                \
   904	  {                                                                            \
   905	    return _NAME(__x, __y, __z);                                               \
   906	  }                                                                            \
   907	                                                                               \
   908	template <typename _Tp, typename _Abi>                                         \
   909	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
   910	    const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
   911	    const simd<_Tp, _Abi>& __z)                                                \
   912	  {                                                                            \
   913	    return _NAME(__x, __y, __z);                                               \
   914	  }                                                                            \
   915	                                                                               \
   916	template <typename _Tp, typename _Abi>                                         \
   917	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
   918	    const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,                    \
   919	    const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
   920	  {                                                                            \
   921	    return _NAME(__x, __y, __z);                                               \
   922	  }                                                                            \
   923	                                                                               \
   924	template <typename _Tp, typename _Abi>                                         \
   925	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
   926	    const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
   927	    const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
   928	  {                                                                            \
   929	    return _NAME(__x, __y, __z);                                               \
   930	  }                                                                            \
   931	                                                                               \
   932	template <typename _Tp, typename _Abi>                                         \
   933	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
   934	    const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
   935	    const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
   936	  {                                                                            \
   937	    return _NAME(__x, __y, __z);                                               \
   938	  }                                                                            \
   939	                                                                               \
   940	template <typename _Tp, typename _Abi>                                         \
   941	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
   942	    const __type_identity_t<simd<_Tp, _Abi>>& __x,                             \
   943	    const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
   944	  {                                                                            \
   945	    return _NAME(__x, __y, __z);                                               \
   946	  }
   947	
   948	template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
   949	  _GLIBCXX_SIMD_INTRINSIC _R
   950	  __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
   951			     const _Tps&... __args)
   952	  {
   953	    return {__private_init,
   954		    __data(__arg0)._M_apply_per_chunk(
   955		      [&](auto __impl, const auto&... __inner) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
   956			using _V = typename decltype(__impl)::simd_type;
   957			return __data(__apply(_V(__private_init, __inner)...));
   958		      },
   959		      __data(__args)...)};
   960	  }
   961	
   962	template <typename _VV>
   963	  __remove_cvref_t<_VV>
   964	  __hypot(_VV __x, _VV __y)
   965	  {
   966	    using _V = __remove_cvref_t<_VV>;
   967	    using _Tp = typename _V::value_type;
   968	    if constexpr (_V::size() == 1)
   969	      return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
   970	    else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
   971	      {
   972		return __fixed_size_apply<_V>([](auto __a,
   973						 auto __b) { return hypot(__a, __b); },
   974					      __x, __y);
   975	      }
   976	    else
   977	      {
   978		// A simple solution for _Tp == float would be to cast to double and
   979		// simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
   980		// dp. It still needs the Annex F fixups though and isn't faster on
   981		// Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
   982		// AVX-512).
   983		using namespace __float_bitwise_operators;
   984		_V __absx = abs(__x);          // no error
   985		_V __absy = abs(__y);          // no error
   986		_V __hi = max(__absx, __absy); // no error
   987		_V __lo = min(__absy, __absx); // no error
   988	
   989		// round __hi down to the next power-of-2:
   990		_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
   991	
   992	#ifndef __FAST_MATH__
   993		if constexpr (__have_neon && !__have_neon_a32)
   994		  { // With ARMv7 NEON, we have no subnormals and must use slightly
   995		    // different strategy
   996		    const _V __hi_exp = __hi & __inf;
   997		    _V __scale_back = __hi_exp;
   998		    // For large exponents (max & max/2) the inversion comes too close
   999		    // to subnormals. Subtract 3 from the exponent:
  1000		    where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
  1001		    // Invert and adjust for the off-by-one error of inversion via xor:
  1002		    const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
  1003		    const _V __h1 = __hi * __scale;
  1004		    const _V __l1 = __lo * __scale;
  1005		    _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
  1006		    // Fix up hypot(0, 0) to not be NaN:
  1007		    where(__hi == 0, __r) = 0;
  1008		    return __r;
  1009		  }
  1010	#endif
  1011	
  1012	#ifdef __FAST_MATH__
  1013		// With fast-math, ignore precision of subnormals and inputs from
  1014		// __finite_max_v/2 to __finite_max_v. This removes all
  1015		// branching/masking.
  1016		if constexpr (true)
  1017	#else
  1018		if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
  1019					    && all_of(isnormal(__y))))
  1020	#endif
  1021		  {
  1022		    const _V __hi_exp = __hi & __inf;
  1023		    //((__hi + __hi) & __inf) ^ __inf almost works for computing
  1024		    //__scale,
  1025		    // except when (__hi + __hi) & __inf == __inf, in which case __scale
  1026		    // becomes 0 (should be min/2 instead) and thus loses the
  1027		    // information from __lo.
  1028	#ifdef __FAST_MATH__
  1029		    using _Ip = __int_for_sizeof_t<_Tp>;
  1030		    using _IV = rebind_simd_t<_Ip, _V>;
  1031		    const auto __as_int = __bit_cast<_IV>(__hi_exp);
  1032		    const _V __scale
  1033		      = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
  1034	#else
  1035		    const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
  1036	#endif
  1037		    _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
  1038		      = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
  1039		    const _V __h1 = (__hi & __mant_mask) | _V(1);
  1040		    const _V __l1 = __lo * __scale;
  1041		    return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
  1042		  }
  1043		else
  1044		  {
  1045		    // slower path to support subnormals
  1046		    // if __hi is subnormal, avoid scaling by inf & final mul by 0
  1047		    // (which yields NaN) by using min()
  1048		    _V __scale = _V(1 / __norm_min_v<_Tp>);
  1049		    // invert exponent w/o error and w/o using the slow divider unit:
  1050		    // xor inverts the exponent but off by 1. Multiplication with .5
  1051		    // adjusts for the discrepancy.
  1052		    where(__hi >= __norm_min_v<_Tp>, __scale)
  1053		      = ((__hi & __inf) ^ __inf) * _Tp(.5);
  1054		    // adjust final exponent for subnormal inputs
  1055		    _V __hi_exp = __norm_min_v<_Tp>;
  1056		    where(__hi >= __norm_min_v<_Tp>, __hi_exp)
  1057		      = __hi & __inf;         // no error
  1058		    _V __h1 = __hi * __scale; // no error
  1059		    _V __l1 = __lo * __scale; // no error
  1060	
  1061		    // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
  1062		    // this ensures no overflow in the argument to sqrt
  1063		    _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
  1064	#ifdef __STDC_IEC_559__
  1065		    // fixup for Annex F requirements
  1066		    // the naive fixup goes like this:
  1067		    //
  1068		    // where(__l1 == 0, __r)                      = __hi;
  1069		    // where(isunordered(__x, __y), __r)          = __quiet_NaN_v<_Tp>;
  1070		    // where(isinf(__absx) || isinf(__absy), __r) = __inf;
  1071		    //
  1072		    // The fixup can be prepared in parallel with the sqrt, requiring a
  1073		    // single blend step after hi_exp * sqrt, reducing latency and
  1074		    // throughput:
  1075		    _V __fixup = __hi; // __lo == 0
  1076		    where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
  1077		    where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
  1078		    where(!(__lo == 0 || isunordered(__x, __y)
  1079			    || (isinf(__absx) || isinf(__absy))),
  1080			  __fixup)
  1081		      = __r;
  1082		    __r = __fixup;
  1083	#endif
  1084		    return __r;
  1085		  }
  1086	      }
  1087	  }
  1088	
  1089	template <typename _Tp, typename _Abi>
  1090	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
  1091	  hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
  1092	  {
  1093	    return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
  1094					 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
  1095										   __y);
  1096	  }
  1097	
  1098	_GLIBCXX_SIMD_CVTING2(hypot)
  1099	
  1100	  template <typename _VV>
  1101	  __remove_cvref_t<_VV>
  1102	  __hypot(_VV __x, _VV __y, _VV __z)
  1103	  {
  1104	    using _V = __remove_cvref_t<_VV>;
  1105	    using _Abi = typename _V::abi_type;
  1106	    using _Tp = typename _V::value_type;
  1107	    /* FIXME: enable after PR77776 is resolved
  1108	    if constexpr (_V::size() == 1)
  1109	      return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
  1110	    else
  1111	    */
  1112	    if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
  1113	      {
  1114		return __fixed_size_apply<simd<_Tp, _Abi>>(
  1115			 [](auto __a, auto __b, auto __c) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  1116			   return hypot(__a, __b, __c);
  1117			 }, __x, __y, __z);
  1118	      }
  1119	    else
  1120	      {
  1121		using namespace __float_bitwise_operators;
  1122		const _V __absx = abs(__x);                 // no error
  1123		const _V __absy = abs(__y);                 // no error
  1124		const _V __absz = abs(__z);                 // no error
  1125		_V __hi = max(max(__absx, __absy), __absz); // no error
  1126		_V __l0 = min(__absz, max(__absx, __absy)); // no error
  1127		_V __l1 = min(__absy, __absx);              // no error
  1128		if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
  1129			      && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
  1130		  { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
  1131		    // NaN. In this case the bit-tricks don't work, they require IEC559
  1132		    // binary32 or binary64 format.
  1133	#ifdef __STDC_IEC_559__
  1134		    // fixup for Annex F requirements
  1135		    if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
  1136		      return __infinity_v<_Tp>;
  1137		    else if (isunordered(__absx[0], __absy[0] + __absz[0]))
  1138		      return __quiet_NaN_v<_Tp>;
  1139		    else if (__l0[0] == 0 && __l1[0] == 0)
  1140		      return __hi;
  1141	#endif
  1142		    _V __hi_exp = __hi;
  1143		    const _ULLong __tmp = 0x8000'0000'0000'0000ull;
  1144		    __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
  1145		    const _V __scale = 1 / __hi_exp;
  1146		    __hi *= __scale;
  1147		    __l0 *= __scale;
  1148		    __l1 *= __scale;
  1149		    return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
  1150		  }
  1151		else
  1152		  {
  1153		    // round __hi down to the next power-of-2:
  1154		    _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
  1155	
  1156	#ifndef __FAST_MATH__
  1157		    if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
  1158		      { // With ARMv7 NEON, we have no subnormals and must use slightly
  1159			// different strategy
  1160			const _V __hi_exp = __hi & __inf;
  1161			_V __scale_back = __hi_exp;
  1162			// For large exponents (max & max/2) the inversion comes too
  1163			// close to subnormals. Subtract 3 from the exponent:
  1164			where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
  1165			// Invert and adjust for the off-by-one error of inversion via
  1166			// xor:
  1167			const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
  1168			const _V __h1 = __hi * __scale;
  1169			__l0 *= __scale;
  1170			__l1 *= __scale;
  1171			_V __lo = __l0 * __l0
  1172				  + __l1 * __l1; // add the two smaller values first
  1173			asm("" : "+m"(__lo));
  1174			_V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
  1175			// Fix up hypot(0, 0, 0) to not be NaN:
  1176			where(__hi == 0, __r) = 0;
  1177			return __r;
  1178		      }
  1179	#endif
  1180	
  1181	#ifdef __FAST_MATH__
  1182		    // With fast-math, ignore precision of subnormals and inputs from
  1183		    // __finite_max_v/2 to __finite_max_v. This removes all
  1184		    // branching/masking.
  1185		    if constexpr (true)
  1186	#else
  1187		    if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
  1188						&& all_of(isnormal(__y))
  1189						&& all_of(isnormal(__z))))
  1190	#endif
  1191		      {
  1192			const _V __hi_exp = __hi & __inf;
  1193			//((__hi + __hi) & __inf) ^ __inf almost works for computing
  1194			//__scale, except when (__hi + __hi) & __inf == __inf, in which
  1195			// case __scale
  1196			// becomes 0 (should be min/2 instead) and thus loses the
  1197			// information from __lo.
  1198	#ifdef __FAST_MATH__
  1199			using _Ip = __int_for_sizeof_t<_Tp>;
  1200			using _IV = rebind_simd_t<_Ip, _V>;
  1201			const auto __as_int = __bit_cast<_IV>(__hi_exp);
  1202			const _V __scale
  1203			  = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
  1204	#else
  1205			const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
  1206	#endif
  1207			constexpr _Tp __mant_mask
  1208			  = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
  1209			const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
  1210			__l0 *= __scale;
  1211			__l1 *= __scale;
  1212			const _V __lo
  1213			  = __l0 * __l0
  1214			    + __l1 * __l1; // add the two smaller values first
  1215			return __hi_exp * sqrt(__lo + __h1 * __h1);
  1216		      }
  1217		    else
  1218		      {
  1219			// slower path to support subnormals
  1220			// if __hi is subnormal, avoid scaling by inf & final mul by 0
  1221			// (which yields NaN) by using min()
  1222			_V __scale = _V(1 / __norm_min_v<_Tp>);
  1223			// invert exponent w/o error and w/o using the slow divider
  1224			// unit: xor inverts the exponent but off by 1. Multiplication
  1225			// with .5 adjusts for the discrepancy.
  1226			where(__hi >= __norm_min_v<_Tp>, __scale)
  1227			  = ((__hi & __inf) ^ __inf) * _Tp(.5);
  1228			// adjust final exponent for subnormal inputs
  1229			_V __hi_exp = __norm_min_v<_Tp>;
  1230			where(__hi >= __norm_min_v<_Tp>, __hi_exp)
  1231			  = __hi & __inf;         // no error
  1232			_V __h1 = __hi * __scale; // no error
  1233			__l0 *= __scale;          // no error
  1234			__l1 *= __scale;          // no error
  1235			_V __lo = __l0 * __l0
  1236				  + __l1 * __l1; // add the two smaller values first
  1237			_V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
  1238	#ifdef __STDC_IEC_559__
  1239			// fixup for Annex F requirements
  1240			_V __fixup = __hi; // __lo == 0
  1241			// where(__lo == 0, __fixup)                   = __hi;
  1242			where(isunordered(__x, __y + __z), __fixup)
  1243			  = __quiet_NaN_v<_Tp>;
  1244			where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
  1245			  = __inf;
  1246			// Instead of __lo == 0, the following could depend on __h1² ==
  1247			// __h1² + __lo (i.e. __hi is so much larger than the other two
  1248			// inputs that the result is exactly __hi). While this may
  1249			// improve precision, it is likely to reduce efficiency if the
  1250			// ISA has FMAs (because __h1² + __lo is an FMA, but the
  1251			// intermediate
  1252			// __h1² must be kept)
  1253			where(!(__lo == 0 || isunordered(__x, __y + __z)
  1254				|| isinf(__absx) || isinf(__absy) || isinf(__absz)),
  1255			      __fixup)
  1256			  = __r;
  1257			__r = __fixup;
  1258	#endif
  1259			return __r;
  1260		      }
  1261		  }
  1262	      }
  1263	  }
  1264	
  1265	  template <typename _Tp, typename _Abi>
  1266	  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
  1267	  hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
  1268		const simd<_Tp, _Abi>& __z)
  1269	  {
  1270	    return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
  1271					 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
  1272										   __y,
  1273										   __z);
  1274	  }
  1275	
  1276	_GLIBCXX_SIMD_CVTING3(hypot)
  1277	
  1278	_GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
  1279	
  1280	_GLIBCXX_SIMD_MATH_CALL_(sqrt)
  1281	_GLIBCXX_SIMD_MATH_CALL_(erf)
  1282	_GLIBCXX_SIMD_MATH_CALL_(erfc)
  1283	_GLIBCXX_SIMD_MATH_CALL_(lgamma)
  1284	_GLIBCXX_SIMD_MATH_CALL_(tgamma)
  1285	_GLIBCXX_SIMD_MATH_CALL_(ceil)
  1286	_GLIBCXX_SIMD_MATH_CALL_(floor)
  1287	_GLIBCXX_SIMD_MATH_CALL_(nearbyint)
  1288	_GLIBCXX_SIMD_MATH_CALL_(rint)
  1289	_GLIBCXX_SIMD_MATH_CALL_(lrint)
  1290	_GLIBCXX_SIMD_MATH_CALL_(llrint)
  1291	
  1292	_GLIBCXX_SIMD_MATH_CALL_(round)
  1293	_GLIBCXX_SIMD_MATH_CALL_(lround)
  1294	_GLIBCXX_SIMD_MATH_CALL_(llround)
  1295	
  1296	_GLIBCXX_SIMD_MATH_CALL_(trunc)
  1297	
  1298	_GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
  1299	_GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
  1300	_GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
  1301	
  1302	template <typename _Tp, typename _Abi>
  1303	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1304	  copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
  1305	  {
  1306	    if constexpr (simd_size_v<_Tp, _Abi> == 1)
  1307	      return std::copysign(__x[0], __y[0]);
  1308	    else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == 12)
  1309	      // Remove this case once __bit_cast is implemented via __builtin_bit_cast.
  1310	      // It is necessary, because __signmask below cannot be computed at compile
  1311	      // time.
  1312	      return simd<_Tp, _Abi>(
  1313		[&](auto __i) { return std::copysign(__x[__i], __y[__i]); });
  1314	    else
  1315	      {
  1316		using _V = simd<_Tp, _Abi>;
  1317		using namespace std::experimental::__float_bitwise_operators;
  1318		_GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
  1319		return (__x & (__x ^ __signmask)) | (__y & __signmask);
  1320	      }
  1321	  }
  1322	
  1323	_GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
  1324	// not covered in [parallel.simd.math]:
  1325	// _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
  1326	_GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
  1327	_GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
  1328	_GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
  1329	
  1330	_GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
  1331	_GLIBCXX_SIMD_MATH_CALL_(fpclassify)
  1332	_GLIBCXX_SIMD_MATH_CALL_(isfinite)
  1333	
  1334	// isnan and isinf require special treatment because old glibc may declare
  1335	// `int isinf(double)`.
  1336	template <typename _Tp, typename _Abi, typename...,
  1337		  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
  1338	  enable_if_t<is_floating_point_v<_Tp>, _R>
  1339	  isinf(simd<_Tp, _Abi> __x)
  1340	  { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
  1341	
  1342	template <typename _Tp, typename _Abi, typename...,
  1343		  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
  1344	  enable_if_t<is_floating_point_v<_Tp>, _R>
  1345	  isnan(simd<_Tp, _Abi> __x)
  1346	  { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
  1347	
  1348	_GLIBCXX_SIMD_MATH_CALL_(isnormal)
  1349	
  1350	template <typename..., typename _Tp, typename _Abi>
  1351	  simd_mask<_Tp, _Abi>
  1352	  signbit(simd<_Tp, _Abi> __x)
  1353	  {
  1354	    if constexpr (is_integral_v<_Tp>)
  1355	      {
  1356		if constexpr (is_unsigned_v<_Tp>)
  1357		  return simd_mask<_Tp, _Abi>{}; // false
  1358		else
  1359		  return __x < 0;
  1360	      }
  1361	    else
  1362	      return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
  1363	  }
  1364	
  1365	_GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
  1366	_GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
  1367	_GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
  1368	_GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
  1369	_GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
  1370	_GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
  1371	
  1372	/* not covered in [parallel.simd.math]
  1373	template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
  1374	template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
  1375	template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
  1376	
  1377	template <typename _V> struct simd_div_t {
  1378	    _V quot, rem;
  1379	};
  1380	
  1381	template <typename _Abi>
  1382	simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
  1383						 _SCharv<_Abi> denom);
  1384	template <typename _Abi>
  1385	simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
  1386						 __shortv<_Abi> denom);
  1387	template <typename _Abi>
  1388	simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
  1389	template <typename _Abi>
  1390	simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
  1391						__longv<_Abi> denom);
  1392	template <typename _Abi>
  1393	simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
  1394						 __llongv<_Abi> denom);
  1395	*/
  1396	
  1397	// special math {{{
  1398	template <typename _Tp, typename _Abi>
  1399	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1400	  assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1401			 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
  1402			 const simd<_Tp, _Abi>& __x)
  1403	  {
  1404	    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  1405		     return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
  1406		   });
  1407	  }
  1408	
  1409	template <typename _Tp, typename _Abi>
  1410	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1411	  assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1412			 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
  1413			 const simd<_Tp, _Abi>& __x)
  1414	  {
  1415	    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  1416		     return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
  1417		   });
  1418	  }
  1419	
  1420	_GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
  1421	_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
  1422	_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
  1423	_GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
  1424	_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
  1425	_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
  1426	_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
  1427	_GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
  1428	_GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
  1429	_GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
  1430	_GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
  1431	_GLIBCXX_SIMD_MATH_CALL_(expint)
  1432	
  1433	template <typename _Tp, typename _Abi>
  1434	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1435	  hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1436		  const simd<_Tp, _Abi>& __x)
  1437	  {
  1438	    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  1439		     return std::hermite(__n[__i], __x[__i]);
  1440		   });
  1441	  }
  1442	
  1443	template <typename _Tp, typename _Abi>
  1444	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1445	  laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1446		   const simd<_Tp, _Abi>& __x)
  1447	  {
  1448	    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  1449		     return std::laguerre(__n[__i], __x[__i]);
  1450		   });
  1451	  }
  1452	
  1453	template <typename _Tp, typename _Abi>
  1454	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1455	  legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1456		   const simd<_Tp, _Abi>& __x)
  1457	  {
  1458	    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  1459		     return std::legendre(__n[__i], __x[__i]);
  1460		   });
  1461	  }
  1462	
  1463	_GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
  1464	
  1465	template <typename _Tp, typename _Abi>
  1466	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1467	  sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1468		     const simd<_Tp, _Abi>& __x)
  1469	  {
  1470	    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  1471		     return std::sph_bessel(__n[__i], __x[__i]);
  1472		   });
  1473	  }
  1474	
  1475	template <typename _Tp, typename _Abi>
  1476	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1477	  sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
  1478		       const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
  1479		       const simd<_Tp, _Abi>& theta)
  1480	  {
  1481	    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  1482		     return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
  1483		   });
  1484	  }
  1485	
  1486	template <typename _Tp, typename _Abi>
  1487	  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
  1488	  sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
  1489		      const simd<_Tp, _Abi>& __x)
  1490	  {
  1491	    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
  1492		     return std::sph_neumann(__n[__i], __x[__i]);
  1493		   });
  1494	  }
  1495	// }}}
  1496	
  1497	#undef _GLIBCXX_SIMD_MATH_CALL_
  1498	#undef _GLIBCXX_SIMD_MATH_CALL2_
  1499	#undef _GLIBCXX_SIMD_MATH_CALL3_
  1500	
  1501	_GLIBCXX_SIMD_END_NAMESPACE
  1502	
  1503	#endif // __cplusplus >= 201703L
  1504	#endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
  1505	
  1506	// vim: foldmethod=marker sw=2 ts=8 noet sts=2