Where Online Learning is simpler!
The C and C++ Include Header Files
/usr/include/c++/11/experimental/bits/simd_fixed_size.h
$ cat -n /usr/include/c++/11/experimental/bits/simd_fixed_size.h 1 // Simd fixed_size ABI specific implementations -*- C++ -*- 2 3 // Copyright (C) 2020-2021 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 //
. 24 25 /* 26 * The fixed_size ABI gives the following guarantees: 27 * - simd objects are passed via the stack 28 * - memory layout of `simd<_Tp, _Np>` is equivalent to `array<_Tp, _Np>` 29 * - alignment of `simd<_Tp, _Np>` is `_Np * sizeof(_Tp)` if _Np is __a 30 * power-of-2 value, otherwise `std::__bit_ceil(_Np * sizeof(_Tp))` (Note: 31 * if the alignment were to exceed the system/compiler maximum, it is bounded 32 * to that maximum) 33 * - simd_mask objects are passed like bitset<_Np> 34 * - memory layout of `simd_mask<_Tp, _Np>` is equivalent to `bitset<_Np>` 35 * - alignment of `simd_mask<_Tp, _Np>` is equal to the alignment of 36 * `bitset<_Np>` 37 */ 38 39 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ 40 #define _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ 41 42 #if __cplusplus >= 201703L 43 44 #include
45 46 _GLIBCXX_SIMD_BEGIN_NAMESPACE 47 48 // __simd_tuple_element {{{ 49 template
50 struct __simd_tuple_element; 51 52 template
53 struct __simd_tuple_element<0, _SimdTuple<_Tp, _A0, _As...>> 54 { using type = simd<_Tp, _A0>; }; 55 56 template
57 struct __simd_tuple_element<_I, _SimdTuple<_Tp, _A0, _As...>> 58 { using type = typename __simd_tuple_element<_I - 1, _SimdTuple<_Tp, _As...>>::type; }; 59 60 template
61 using __simd_tuple_element_t = typename __simd_tuple_element<_I, _Tp>::type; 62 63 // }}} 64 // __simd_tuple_concat {{{ 65 66 template
67 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0s..., _A1s...> 68 __simd_tuple_concat(const _SimdTuple<_Tp, _A0s...>& __left, 69 const _SimdTuple<_Tp, _A1s...>& __right) 70 { 71 if constexpr (sizeof...(_A0s) == 0) 72 return __right; 73 else if constexpr (sizeof...(_A1s) == 0) 74 return __left; 75 else 76 return {__left.first, __simd_tuple_concat(__left.second, __right)}; 77 } 78 79 template
80 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, simd_abi::scalar, _A10, _A1s...> 81 __simd_tuple_concat(const _Tp& __left, const _SimdTuple<_Tp, _A10, _A1s...>& __right) 82 { return {__left, __right}; } 83 84 // }}} 85 // __simd_tuple_pop_front {{{ 86 // Returns the next _SimdTuple in __x that has _Np elements less. 87 // Precondition: _Np must match the number of elements in __first (recursively) 88 template
89 _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) 90 __simd_tuple_pop_front(_Tp&& __x) 91 { 92 if constexpr (_Np == 0) 93 return static_cast<_Tp&&>(__x); 94 else 95 { 96 using _Up = __remove_cvref_t<_Tp>; 97 static_assert(_Np >= _Up::_S_first_size); 98 return __simd_tuple_pop_front<_Np - _Up::_S_first_size>(__x.second); 99 } 100 } 101 102 // }}} 103 // __get_simd_at<_Np> {{{1 104 struct __as_simd {}; 105 106 struct __as_simd_tuple {}; 107 108 template
109 _GLIBCXX_SIMD_INTRINSIC constexpr simd<_Tp, _A0> 110 __simd_tuple_get_impl(__as_simd, const _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) 111 { return {__private_init, __t.first}; } 112 113 template
114 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 115 __simd_tuple_get_impl(__as_simd_tuple, const _SimdTuple<_Tp, _A0, _Abis...>& __t, 116 _SizeConstant<0>) 117 { return __t.first; } 118 119 template
120 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 121 __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) 122 { return __t.first; } 123 124 template
125 _GLIBCXX_SIMD_INTRINSIC constexpr auto 126 __simd_tuple_get_impl(_R, const _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) 127 { return __simd_tuple_get_impl(_R(), __t.second, _SizeConstant<_Np - 1>()); } 128 129 template
130 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 131 __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) 132 { return __simd_tuple_get_impl(__as_simd_tuple(), __t.second, _SizeConstant<_Np - 1>()); } 133 134 template
135 _GLIBCXX_SIMD_INTRINSIC constexpr auto 136 __get_simd_at(const _SimdTuple<_Tp, _Abis...>& __t) 137 { return __simd_tuple_get_impl(__as_simd(), __t, _SizeConstant<_Np>()); } 138 139 // }}} 140 // __get_tuple_at<_Np> {{{ 141 template
142 _GLIBCXX_SIMD_INTRINSIC constexpr auto 143 __get_tuple_at(const _SimdTuple<_Tp, _Abis...>& __t) 144 { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } 145 146 template
147 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 148 __get_tuple_at(_SimdTuple<_Tp, _Abis...>& __t) 149 { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } 150 151 // __tuple_element_meta {{{1 152 template
153 struct __tuple_element_meta : public _Abi::_SimdImpl 154 { 155 static_assert(is_same_v
); // this fails e.g. when _SimdImpl is an 157 // alias for _SimdImplBuiltin<_DifferentAbi> 158 using value_type = _Tp; 159 using abi_type = _Abi; 160 using _Traits = _SimdTraits<_Tp, _Abi>; 161 using _MaskImpl = typename _Abi::_MaskImpl; 162 using _MaskMember = typename _Traits::_MaskMember; 163 using simd_type = simd<_Tp, _Abi>; 164 static constexpr size_t _S_offset = _Offset; 165 static constexpr size_t _S_size() { return simd_size<_Tp, _Abi>::value; } 166 static constexpr _MaskImpl _S_mask_impl = {}; 167 168 template
169 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 170 _S_submask(_BitMask<_Np, _Sanitized> __bits) 171 { return __bits.template _M_extract<_Offset, _S_size()>(); } 172 173 template
174 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 175 _S_make_mask(_BitMask<_Np, _Sanitized> __bits) 176 { 177 return _MaskImpl::template _S_convert<_Tp>( 178 __bits.template _M_extract<_Offset, _S_size()>()._M_sanitized()); 179 } 180 181 _GLIBCXX_SIMD_INTRINSIC static constexpr _ULLong 182 _S_mask_to_shifted_ullong(_MaskMember __k) 183 { return _MaskImpl::_S_to_bits(__k).to_ullong() << _Offset; } 184 }; 185 186 template
187 constexpr 188 __tuple_element_meta<_Tp, _Abi, _Offset> 189 __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&) 190 { return {}; } 191 192 // }}}1 193 // _WithOffset wrapper class {{{ 194 template
195 struct _WithOffset : public _Base 196 { 197 static inline constexpr size_t _S_offset = _Offset; 198 199 _GLIBCXX_SIMD_INTRINSIC char* 200 _M_as_charptr() 201 { return reinterpret_cast
(this) + _S_offset * sizeof(typename _Base::value_type); } 202 203 _GLIBCXX_SIMD_INTRINSIC const char* 204 _M_as_charptr() const 205 { return reinterpret_cast
(this) + _S_offset * sizeof(typename _Base::value_type); } 206 }; 207 208 // make _WithOffset<_WithOffset> ill-formed to use: 209 template
210 struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {}; 211 212 template
213 decltype(auto) 214 __add_offset(_Tp& __base) 215 { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } 216 217 template
218 decltype(auto) 219 __add_offset(const _Tp& __base) 220 { return static_cast
>&>(__base); } 221 222 template
223 decltype(auto) 224 __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base) 225 { return static_cast<_WithOffset<_Offset + _ExistingOffset, _Tp>&>(static_cast<_Tp&>(__base)); } 226 227 template
228 decltype(auto) 229 __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base) 230 { 231 return static_cast
&>( 232 static_cast
(__base)); 233 } 234 235 template
236 constexpr inline size_t __offset = 0; 237 238 template
239 constexpr inline size_t __offset<_WithOffset<_Offset, _Tp>> 240 = _WithOffset<_Offset, _Tp>::_S_offset; 241 242 template
243 constexpr inline size_t __offset
= __offset<_Tp>; 244 245 template
246 constexpr inline size_t __offset<_Tp&> = __offset<_Tp>; 247 248 template
249 constexpr inline size_t __offset<_Tp&&> = __offset<_Tp>; 250 251 // }}} 252 // _SimdTuple specializations {{{1 253 // empty {{{2 254 template
255 struct _SimdTuple<_Tp> 256 { 257 using value_type = _Tp; 258 static constexpr size_t _S_tuple_size = 0; 259 static constexpr size_t _S_size() { return 0; } 260 }; 261 262 // _SimdTupleData {{{2 263 template
264 struct _SimdTupleData 265 { 266 _FirstType first; 267 _SecondType second; 268 269 _GLIBCXX_SIMD_INTRINSIC 270 constexpr bool 271 _M_is_constprop() const 272 { 273 if constexpr (is_class_v<_FirstType>) 274 return first._M_is_constprop() && second._M_is_constprop(); 275 else 276 return __builtin_constant_p(first) && second._M_is_constprop(); 277 } 278 }; 279 280 template
281 struct _SimdTupleData<_FirstType, _SimdTuple<_Tp>> 282 { 283 _FirstType first; 284 static constexpr _SimdTuple<_Tp> second = {}; 285 286 _GLIBCXX_SIMD_INTRINSIC 287 constexpr bool 288 _M_is_constprop() const 289 { 290 if constexpr (is_class_v<_FirstType>) 291 return first._M_is_constprop(); 292 else 293 return __builtin_constant_p(first); 294 } 295 }; 296 297 // 1 or more {{{2 298 template
299 struct _SimdTuple<_Tp, _Abi0, _Abis...> 300 : _SimdTupleData
::_SimdMember, 301 _SimdTuple<_Tp, _Abis...>> 302 { 303 static_assert(!__is_fixed_size_abi_v<_Abi0>); 304 using value_type = _Tp; 305 using _FirstType = typename _SimdTraits<_Tp, _Abi0>::_SimdMember; 306 using _FirstAbi = _Abi0; 307 using _SecondType = _SimdTuple<_Tp, _Abis...>; 308 static constexpr size_t _S_tuple_size = sizeof...(_Abis) + 1; 309 310 static constexpr size_t _S_size() 311 { return simd_size_v<_Tp, _Abi0> + _SecondType::_S_size(); } 312 313 static constexpr size_t _S_first_size = simd_size_v<_Tp, _Abi0>; 314 static constexpr bool _S_is_homogeneous = (is_same_v<_Abi0, _Abis> && ...); 315 316 using _Base = _SimdTupleData
::_SimdMember, 317 _SimdTuple<_Tp, _Abis...>>; 318 using _Base::first; 319 using _Base::second; 320 321 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple() = default; 322 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(const _SimdTuple&) = default; 323 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple& operator=(const _SimdTuple&) 324 = default; 325 326 template
327 _GLIBCXX_SIMD_INTRINSIC constexpr 328 _SimdTuple(_Up&& __x) 329 : _Base{static_cast<_Up&&>(__x)} {} 330 331 template
332 _GLIBCXX_SIMD_INTRINSIC constexpr 333 _SimdTuple(_Up&& __x, _Up2&& __y) 334 : _Base{static_cast<_Up&&>(__x), static_cast<_Up2&&>(__y)} {} 335 336 template
337 _GLIBCXX_SIMD_INTRINSIC constexpr 338 _SimdTuple(_Up&& __x, _SimdTuple<_Tp>) 339 : _Base{static_cast<_Up&&>(__x)} {} 340 341 _GLIBCXX_SIMD_INTRINSIC char* 342 _M_as_charptr() 343 { return reinterpret_cast
(this); } 344 345 _GLIBCXX_SIMD_INTRINSIC const char* 346 _M_as_charptr() const 347 { return reinterpret_cast
(this); } 348 349 template
350 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 351 _M_at() 352 { 353 if constexpr (_Np == 0) 354 return first; 355 else 356 return second.template _M_at<_Np - 1>(); 357 } 358 359 template
360 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 361 _M_at() const 362 { 363 if constexpr (_Np == 0) 364 return first; 365 else 366 return second.template _M_at<_Np - 1>(); 367 } 368 369 template
370 _GLIBCXX_SIMD_INTRINSIC constexpr auto 371 _M_simd_at() const 372 { 373 if constexpr (_Np == 0) 374 return simd<_Tp, _Abi0>(__private_init, first); 375 else 376 return second.template _M_simd_at<_Np - 1>(); 377 } 378 379 template
380 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple 381 _S_generate(_Fp&& __gen, _SizeConstant<_Offset> = {}) 382 { 383 auto&& __first = __gen(__tuple_element_meta<_Tp, _Abi0, _Offset>()); 384 if constexpr (_S_tuple_size == 1) 385 return {__first}; 386 else 387 return {__first, 388 _SecondType::_S_generate( 389 static_cast<_Fp&&>(__gen), 390 _SizeConstant<_Offset + simd_size_v<_Tp, _Abi0>>())}; 391 } 392 393 template
394 _GLIBCXX_SIMD_INTRINSIC _SimdTuple 395 _M_apply_wrapped(_Fp&& __fun, const _More&... __more) const 396 { 397 auto&& __first 398 = __fun(__make_meta<_Offset>(*this), first, __more.first...); 399 if constexpr (_S_tuple_size == 1) 400 return {__first}; 401 else 402 return { 403 __first, 404 second.template _M_apply_wrapped<_Offset + simd_size_v<_Tp, _Abi0>>( 405 static_cast<_Fp&&>(__fun), __more.second...)}; 406 } 407 408 template
409 _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) 410 _M_extract_argument(_Tup&& __tup) const 411 { 412 using _TupT = typename __remove_cvref_t<_Tup>::value_type; 413 if constexpr (is_same_v<_SimdTuple, __remove_cvref_t<_Tup>>) 414 return __tup.first; 415 else if (__builtin_is_constant_evaluated()) 416 return __fixed_size_storage_t<_TupT, _S_first_size>::_S_generate( 417 [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 418 return __meta._S_generator( 419 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 420 return __tup[__i]; 421 }, static_cast<_TupT*>(nullptr)); 422 }); 423 else 424 return [&]() { // not always_inline; allow the compiler to decide 425 __fixed_size_storage_t<_TupT, _S_first_size> __r; 426 __builtin_memcpy(__r._M_as_charptr(), __tup._M_as_charptr(), 427 sizeof(__r)); 428 return __r; 429 }(); 430 } 431 432 template
433 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 434 _M_skip_argument(_Tup&& __tup) const 435 { 436 static_assert(_S_tuple_size > 1); 437 using _Up = __remove_cvref_t<_Tup>; 438 constexpr size_t __off = __offset<_Up>; 439 if constexpr (_S_first_size == _Up::_S_first_size && __off == 0) 440 return __tup.second; 441 else if constexpr (_S_first_size > _Up::_S_first_size 442 && _S_first_size % _Up::_S_first_size == 0 443 && __off == 0) 444 return __simd_tuple_pop_front<_S_first_size>(__tup); 445 else if constexpr (_S_first_size + __off < _Up::_S_first_size) 446 return __add_offset<_S_first_size>(__tup); 447 else if constexpr (_S_first_size + __off == _Up::_S_first_size) 448 return __tup.second; 449 else 450 __assert_unreachable<_Tup>(); 451 } 452 453 template
454 _GLIBCXX_SIMD_INTRINSIC constexpr void 455 _M_assign_front(const _SimdTuple<_Tp, _Abi0, _More...>& __x) & 456 { 457 static_assert(_Offset == 0); 458 first = __x.first; 459 if constexpr (sizeof...(_More) > 0) 460 { 461 static_assert(sizeof...(_Abis) >= sizeof...(_More)); 462 second.template _M_assign_front<0>(__x.second); 463 } 464 } 465 466 template
467 _GLIBCXX_SIMD_INTRINSIC constexpr void 468 _M_assign_front(const _FirstType& __x) & 469 { 470 static_assert(_Offset == 0); 471 first = __x; 472 } 473 474 template
475 _GLIBCXX_SIMD_INTRINSIC constexpr void 476 _M_assign_front(const _SimdTuple<_Tp, _As...>& __x) & 477 { 478 __builtin_memcpy(_M_as_charptr() + _Offset * sizeof(value_type), 479 __x._M_as_charptr(), 480 sizeof(_Tp) * _SimdTuple<_Tp, _As...>::_S_size()); 481 } 482 483 /* 484 * Iterate over the first objects in this _SimdTuple and call __fun for each 485 * of them. If additional arguments are passed via __more, chunk them into 486 * _SimdTuple or __vector_type_t objects of the same number of values. 487 */ 488 template
489 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple 490 _M_apply_per_chunk(_Fp&& __fun, _More&&... __more) const 491 { 492 if constexpr ((... 493 || conjunction_v< 494 is_lvalue_reference<_More>, 495 negation
>>>) ) 496 { 497 // need to write back at least one of __more after calling __fun 498 auto&& __first = [&](auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 499 auto __r = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, 500 __args...); 501 [[maybe_unused]] auto&& __ignore_me = {( 502 [](auto&& __dst, const auto& __src) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 503 if constexpr (is_assignable_v
) 505 { 506 __dst.template _M_assign_front<__offset
>( 507 __src); 508 } 509 }(static_cast<_More&&>(__more), __args), 510 0)...}; 511 return __r; 512 }(_M_extract_argument(__more)...); 513 if constexpr (_S_tuple_size == 1) 514 return {__first}; 515 else 516 return {__first, 517 second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), 518 _M_skip_argument(__more)...)}; 519 } 520 else 521 { 522 auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, 523 _M_extract_argument(__more)...); 524 if constexpr (_S_tuple_size == 1) 525 return {__first}; 526 else 527 return {__first, 528 second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), 529 _M_skip_argument(__more)...)}; 530 } 531 } 532 533 template
534 _GLIBCXX_SIMD_INTRINSIC constexpr auto 535 _M_apply_r(_Fp&& __fun, const _More&... __more) const 536 { 537 auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, 538 __more.first...); 539 if constexpr (_S_tuple_size == 1) 540 return __first; 541 else 542 return __simd_tuple_concat<_R>( 543 __first, second.template _M_apply_r<_R>(static_cast<_Fp&&>(__fun), 544 __more.second...)); 545 } 546 547 template
548 _GLIBCXX_SIMD_INTRINSIC constexpr friend _SanitizedBitMask<_S_size()> 549 _M_test(const _Fp& __fun, const _SimdTuple& __x, const _More&... __more) 550 { 551 const _SanitizedBitMask<_S_first_size> __first 552 = _Abi0::_MaskImpl::_S_to_bits( 553 __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), __x.first, 554 __more.first...)); 555 if constexpr (_S_tuple_size == 1) 556 return __first; 557 else 558 return _M_test(__fun, __x.second, __more.second...) 559 ._M_prepend(__first); 560 } 561 562 template
563 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 564 operator[](integral_constant<_Up, _I>) const noexcept 565 { 566 if constexpr (_I < simd_size_v<_Tp, _Abi0>) 567 return _M_subscript_read(_I); 568 else 569 return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()]; 570 } 571 572 constexpr _Tp 573 operator[](size_t __i) const noexcept 574 { 575 if constexpr (_S_tuple_size == 1) 576 return _M_subscript_read(__i); 577 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 578 else if (not __builtin_is_constant_evaluated()) 579 return reinterpret_cast
*>(this)[__i]; 580 #endif 581 else if constexpr (__is_scalar_abi<_Abi0>()) 582 { 583 const _Tp* ptr = &first; 584 return ptr[__i]; 585 } 586 else 587 return __i < simd_size_v<_Tp, _Abi0> ? _M_subscript_read(__i) 588 : second[__i - simd_size_v<_Tp, _Abi0>]; 589 } 590 591 constexpr void 592 _M_set(size_t __i, _Tp __val) noexcept 593 { 594 if constexpr (_S_tuple_size == 1) 595 return _M_subscript_write(__i, __val); 596 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 597 else if (not __builtin_is_constant_evaluated()) 598 reinterpret_cast<__may_alias<_Tp>*>(this)[__i] = __val; 599 #endif 600 else if (__i < simd_size_v<_Tp, _Abi0>) 601 _M_subscript_write(__i, __val); 602 else 603 second._M_set(__i - simd_size_v<_Tp, _Abi0>, __val); 604 } 605 606 private: 607 // _M_subscript_read/_write {{{ 608 constexpr _Tp 609 _M_subscript_read([[maybe_unused]] size_t __i) const noexcept 610 { 611 if constexpr (__is_vectorizable_v<_FirstType>) 612 return first; 613 else 614 return first[__i]; 615 } 616 617 constexpr void 618 _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept 619 { 620 if constexpr (__is_vectorizable_v<_FirstType>) 621 first = __y; 622 else 623 first._M_set(__i, __y); 624 } 625 626 // }}} 627 }; 628 629 // __make_simd_tuple {{{1 630 template
631 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0> 632 __make_simd_tuple(simd<_Tp, _A0> __x0) 633 { return {__data(__x0)}; } 634 635 template
636 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _As...> 637 __make_simd_tuple(const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs) 638 { return {__data(__x0), __make_simd_tuple(__xs...)}; } 639 640 template
641 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0> 642 __make_simd_tuple(const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0) 643 { return {__arg0}; } 644 645 template
646 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _A1, _Abis...> 647 __make_simd_tuple( 648 const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0, 649 const typename _SimdTraits<_Tp, _A1>::_SimdMember& __arg1, 650 const typename _SimdTraits<_Tp, _Abis>::_SimdMember&... __args) 651 { return {__arg0, __make_simd_tuple<_Tp, _A1, _Abis...>(__arg1, __args...)}; } 652 653 // __to_simd_tuple {{{1 654 template
655 _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> 656 __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX); 657 658 template
, typename _V0, 661 typename _V0VT = _VectorTraits<_V0>, typename... _VX> 662 _GLIBCXX_SIMD_INTRINSIC _R constexpr __to_simd_tuple(const _V0 __from0, const _VX... __fromX) 663 { 664 static_assert(is_same_v
); 665 static_assert(_Offset < _V0VT::_S_full_size); 666 using _R0 = __vector_type_t<_Tp, _R::_S_first_size>; 667 if constexpr (_R::_S_tuple_size == 1) 668 { 669 if constexpr (_Np == 1) 670 return _R{__from0[_Offset]}; 671 else if constexpr (_Offset == 0 && _V0VT::_S_full_size >= _Np) 672 return _R{__intrin_bitcast<_R0>(__from0)}; 673 else if constexpr (_Offset * 2 == _V0VT::_S_full_size 674 && _V0VT::_S_full_size / 2 >= _Np) 675 return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0))}; 676 else if constexpr (_Offset * 4 == _V0VT::_S_full_size 677 && _V0VT::_S_full_size / 4 >= _Np) 678 return _R{__intrin_bitcast<_R0>(__extract_part<1, 4>(__from0))}; 679 else 680 __assert_unreachable<_Tp>(); 681 } 682 else 683 { 684 if constexpr (1 == _R::_S_first_size) 685 { // extract one scalar and recurse 686 if constexpr (_Offset + 1 < _V0VT::_S_full_size) 687 return _R{__from0[_Offset], 688 __to_simd_tuple<_Tp, _Np - 1, _Offset + 1>(__from0, 689 __fromX...)}; 690 else 691 return _R{__from0[_Offset], 692 __to_simd_tuple<_Tp, _Np - 1, 0>(__fromX...)}; 693 } 694 695 // place __from0 into _R::first and recurse for __fromX -> _R::second 696 else if constexpr (_V0VT::_S_full_size == _R::_S_first_size 697 && _Offset == 0) 698 return _R{__from0, 699 __to_simd_tuple<_Tp, _Np - _R::_S_first_size>(__fromX...)}; 700 701 // place lower part of __from0 into _R::first and recurse with _Offset 702 else if constexpr (_V0VT::_S_full_size > _R::_S_first_size 703 && _Offset == 0) 704 return _R{__intrin_bitcast<_R0>(__from0), 705 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 706 _R::_S_first_size>(__from0, __fromX...)}; 707 708 // place lower part of second quarter of __from0 into _R::first and 709 // recurse with _Offset 710 else if constexpr (_Offset * 4 == _V0VT::_S_full_size 711 && _V0VT::_S_full_size >= 4 * _R::_S_first_size) 712 return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), 713 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 714 _Offset + _R::_S_first_size>(__from0, 715 __fromX...)}; 716 717 // place lower half of high half of __from0 into _R::first and recurse 718 // with _Offset 719 else if constexpr (_Offset * 2 == _V0VT::_S_full_size 720 && _V0VT::_S_full_size >= 4 * _R::_S_first_size) 721 return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), 722 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 723 _Offset + _R::_S_first_size>(__from0, 724 __fromX...)}; 725 726 // place high half of __from0 into _R::first and recurse with __fromX 727 else if constexpr (_Offset * 2 == _V0VT::_S_full_size 728 && _V0VT::_S_full_size / 2 >= _R::_S_first_size) 729 return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0)), 730 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 0>( 731 __fromX...)}; 732 733 // ill-formed if some unforseen pattern is needed 734 else 735 __assert_unreachable<_Tp>(); 736 } 737 } 738 739 template
740 _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> 741 __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX) 742 { 743 if constexpr (is_same_v<_Tp, _V>) 744 { 745 static_assert( 746 sizeof...(_VX) == 0, 747 "An array of scalars must be the last argument to __to_simd_tuple"); 748 return __call_with_subscripts( 749 __from, make_index_sequence<_NV>(), 750 [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 751 return __simd_tuple_concat( 752 _SimdTuple<_Tp, simd_abi::scalar>{__args}..., _SimdTuple<_Tp>()); 753 }); 754 } 755 else 756 return __call_with_subscripts( 757 __from, make_index_sequence<_NV>(), 758 [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 759 return __to_simd_tuple<_Tp, _Np>(__args..., __fromX...); 760 }); 761 } 762 763 template
764 using __to_tuple_helper = _Tp; 765 766 template
768 _GLIBCXX_SIMD_INTRINSIC __fixed_size_storage_t<_Tp, _NOut> 769 __to_simd_tuple_impl(index_sequence<_Indexes...>, 770 const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) 771 { 772 return __make_simd_tuple<_Tp, __to_tuple_helper<_Indexes, _A0>...>( 773 __args[_Indexes]...); 774 } 775 776 template
> 778 _GLIBCXX_SIMD_INTRINSIC _R 779 __to_simd_tuple_sized( 780 const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) 781 { 782 static_assert(_Np * simd_size_v<_Tp, _A0> >= _NOut); 783 return __to_simd_tuple_impl<_Tp, _A0, _NOut>( 784 make_index_sequence<_R::_S_tuple_size>(), __args); 785 } 786 787 // __optimize_simd_tuple {{{1 788 template
789 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp> 790 __optimize_simd_tuple(const _SimdTuple<_Tp>) 791 { return {}; } 792 793 template
794 _GLIBCXX_SIMD_INTRINSIC constexpr const _SimdTuple<_Tp, _Ap>& 795 __optimize_simd_tuple(const _SimdTuple<_Tp, _Ap>& __x) 796 { return __x; } 797 798 template
::_S_size()>> 801 _GLIBCXX_SIMD_INTRINSIC constexpr _R 802 __optimize_simd_tuple(const _SimdTuple<_Tp, _A0, _A1, _Abis...>& __x) 803 { 804 using _Tup = _SimdTuple<_Tp, _A0, _A1, _Abis...>; 805 if constexpr (is_same_v<_R, _Tup>) 806 return __x; 807 else if constexpr (is_same_v
) 809 return {__x.first, __optimize_simd_tuple(__x.second)}; 810 else if constexpr (__is_scalar_abi<_A0>() 811 || _A0::template _S_is_partial<_Tp>) 812 return {__generate_from_n_evaluations<_R::_S_first_size, 813 typename _R::_FirstType>( 814 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }), 815 __optimize_simd_tuple( 816 __simd_tuple_pop_front<_R::_S_first_size>(__x))}; 817 else if constexpr (is_same_v<_A0, _A1> 818 && _R::_S_first_size == simd_size_v<_Tp, _A0> + simd_size_v<_Tp, _A1>) 819 return {__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), 820 __optimize_simd_tuple(__x.second.second)}; 821 else if constexpr (sizeof...(_Abis) >= 2 822 && _R::_S_first_size == (4 * simd_size_v<_Tp, _A0>) 823 && simd_size_v<_Tp, _A0> == __simd_tuple_element_t< 824 (sizeof...(_Abis) >= 2 ? 3 : 0), _Tup>::size()) 825 return { 826 __concat(__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), 827 __concat(__x.template _M_at<2>(), __x.template _M_at<3>())), 828 __optimize_simd_tuple(__x.second.second.second.second)}; 829 else 830 { 831 static_assert(sizeof(_R) == sizeof(__x)); 832 _R __r; 833 __builtin_memcpy(__r._M_as_charptr(), __x._M_as_charptr(), 834 sizeof(_Tp) * _R::_S_size()); 835 return __r; 836 } 837 } 838 839 // __for_each(const _SimdTuple &, Fun) {{{1 840 template
841 _GLIBCXX_SIMD_INTRINSIC constexpr void 842 __for_each(const _SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) 843 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } 844 845 template
847 _GLIBCXX_SIMD_INTRINSIC constexpr void 848 __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) 849 { 850 __fun(__make_meta<_Offset>(__t), __t.first); 851 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, 852 static_cast<_Fp&&>(__fun)); 853 } 854 855 // __for_each(_SimdTuple &, Fun) {{{1 856 template
857 _GLIBCXX_SIMD_INTRINSIC constexpr void 858 __for_each(_SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) 859 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } 860 861 template
863 _GLIBCXX_SIMD_INTRINSIC constexpr void 864 __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) 865 { 866 __fun(__make_meta<_Offset>(__t), __t.first); 867 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, 868 static_cast<_Fp&&>(__fun)); 869 } 870 871 // __for_each(_SimdTuple &, const _SimdTuple &, Fun) {{{1 872 template
873 _GLIBCXX_SIMD_INTRINSIC constexpr void 874 __for_each(_SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) 875 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } 876 877 template
879 _GLIBCXX_SIMD_INTRINSIC constexpr void 880 __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __a, 881 const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) 882 { 883 __fun(__make_meta<_Offset>(__a), __a.first, __b.first); 884 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, 885 static_cast<_Fp&&>(__fun)); 886 } 887 888 // __for_each(const _SimdTuple &, const _SimdTuple &, Fun) {{{1 889 template
890 _GLIBCXX_SIMD_INTRINSIC constexpr void 891 __for_each(const _SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) 892 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } 893 894 template
896 _GLIBCXX_SIMD_INTRINSIC constexpr void 897 __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __a, 898 const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) 899 { 900 __fun(__make_meta<_Offset>(__a), __a.first, __b.first); 901 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, 902 static_cast<_Fp&&>(__fun)); 903 } 904 905 // }}}1 906 // __extract_part(_SimdTuple) {{{ 907 template
908 _GLIBCXX_SIMD_INTRINSIC constexpr auto // __vector_type_t or _SimdTuple 909 __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x) 910 { 911 // worst cases: 912 // (a) 4, 4, 4 => 3, 3, 3, 3 (_Total = 4) 913 // (b) 2, 2, 2 => 3, 3 (_Total = 2) 914 // (c) 4, 2 => 2, 2, 2 (_Total = 3) 915 using _Tuple = _SimdTuple<_Tp, _A0, _As...>; 916 static_assert(_Index + _Combine <= _Total && _Index >= 0 && _Total >= 1); 917 constexpr size_t _Np = _Tuple::_S_size(); 918 static_assert(_Np >= _Total && _Np % _Total == 0); 919 constexpr size_t __values_per_part = _Np / _Total; 920 [[maybe_unused]] constexpr size_t __values_to_skip 921 = _Index * __values_per_part; 922 constexpr size_t __return_size = __values_per_part * _Combine; 923 using _RetAbi = simd_abi::deduce_t<_Tp, __return_size>; 924 925 // handle (optimize) the simple cases 926 if constexpr (_Index == 0 && _Tuple::_S_first_size == __return_size) 927 return __x.first._M_data; 928 else if constexpr (_Index == 0 && _Total == _Combine) 929 return __x; 930 else if constexpr (_Index == 0 && _Tuple::_S_first_size >= __return_size) 931 return __intrin_bitcast<__vector_type_t<_Tp, __return_size>>( 932 __as_vector(__x.first)); 933 934 // recurse to skip unused data members at the beginning of _SimdTuple 935 else if constexpr (__values_to_skip >= _Tuple::_S_first_size) 936 { // recurse 937 if constexpr (_Tuple::_S_first_size % __values_per_part == 0) 938 { 939 constexpr int __parts_in_first 940 = _Tuple::_S_first_size / __values_per_part; 941 return __extract_part<_Index - __parts_in_first, 942 _Total - __parts_in_first, _Combine>( 943 __x.second); 944 } 945 else 946 return __extract_part<__values_to_skip - _Tuple::_S_first_size, 947 _Np - _Tuple::_S_first_size, __return_size>( 948 __x.second); 949 } 950 951 // extract from multiple _SimdTuple data members 952 else if constexpr (__return_size > _Tuple::_S_first_size - __values_to_skip) 953 { 954 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 955 const __may_alias<_Tp>* const element_ptr 956 = reinterpret_cast
*>(&__x) + __values_to_skip; 957 return __as_vector(simd<_Tp, _RetAbi>(element_ptr, element_aligned)); 958 #else 959 [[maybe_unused]] constexpr size_t __offset = __values_to_skip; 960 return __as_vector(simd<_Tp, _RetAbi>( 961 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 962 constexpr _SizeConstant<__i + __offset> __k; 963 return __x[__k]; 964 })); 965 #endif 966 } 967 968 // all of the return values are in __x.first 969 else if constexpr (_Tuple::_S_first_size % __values_per_part == 0) 970 return __extract_part<_Index, _Tuple::_S_first_size / __values_per_part, 971 _Combine>(__x.first); 972 else 973 return __extract_part<__values_to_skip, _Tuple::_S_first_size, 974 _Combine * __values_per_part>(__x.first); 975 } 976 977 // }}} 978 // __fixed_size_storage_t<_Tp, _Np>{{{ 979 template
>, 981 int _Remain = _Np - int(_Next::size())> 982 struct __fixed_size_storage_builder; 983 984 template
985 struct __fixed_size_storage 986 : public __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp>> {}; 987 988 template
989 struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, 990 0> 991 { using type = _SimdTuple<_Tp, _As..., typename _Next::abi_type>; }; 992 993 template
994 struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, 995 _Remain> 996 { 997 using type = typename __fixed_size_storage_builder< 998 _Tp, _Remain, _SimdTuple<_Tp, _As..., typename _Next::abi_type>>::type; 999 }; 1000 1001 // }}} 1002 // _AbisInSimdTuple {{{ 1003 template
1004 struct _SeqOp; 1005 1006 template
1007 struct _SeqOp
> 1008 { 1009 using _FirstPlusOne = index_sequence<_I0 + 1, _Is...>; 1010 using _NotFirstPlusOne = index_sequence<_I0, (_Is + 1)...>; 1011 template
1012 using _Prepend = index_sequence<_First, _I0 + _Add, (_Is + _Add)...>; 1013 }; 1014 1015 template
1016 struct _AbisInSimdTuple; 1017 1018 template
1019 struct _AbisInSimdTuple<_SimdTuple<_Tp>> 1020 { 1021 using _Counts = index_sequence<0>; 1022 using _Begins = index_sequence<0>; 1023 }; 1024 1025 template
1026 struct _AbisInSimdTuple<_SimdTuple<_Tp, _Ap>> 1027 { 1028 using _Counts = index_sequence<1>; 1029 using _Begins = index_sequence<0>; 1030 }; 1031 1032 template
1033 struct _AbisInSimdTuple<_SimdTuple<_Tp, _A0, _A0, _As...>> 1034 { 1035 using _Counts = typename _SeqOp
>::_Counts>::_FirstPlusOne; 1037 using _Begins = typename _SeqOp
>::_Begins>::_NotFirstPlusOne; 1039 }; 1040 1041 template
1042 struct _AbisInSimdTuple<_SimdTuple<_Tp, _A0, _A1, _As...>> 1043 { 1044 using _Counts = typename _SeqOp
>::_Counts>::template _Prepend<1, 0>; 1046 using _Begins = typename _SeqOp
>::_Begins>::template _Prepend<0, 1>; 1048 }; 1049 1050 // }}} 1051 // __autocvt_to_simd {{{ 1052 template
>> 1053 struct __autocvt_to_simd 1054 { 1055 _Tp _M_data; 1056 using _TT = __remove_cvref_t<_Tp>; 1057 1058 constexpr 1059 operator _TT() 1060 { return _M_data; } 1061 1062 constexpr 1063 operator _TT&() 1064 { 1065 static_assert(is_lvalue_reference<_Tp>::value, ""); 1066 static_assert(!is_const<_Tp>::value, ""); 1067 return _M_data; 1068 } 1069 1070 constexpr 1071 operator _TT*() 1072 { 1073 static_assert(is_lvalue_reference<_Tp>::value, ""); 1074 static_assert(!is_const<_Tp>::value, ""); 1075 return &_M_data; 1076 } 1077 1078 constexpr inline 1079 __autocvt_to_simd(_Tp dd) : _M_data(dd) {} 1080 1081 template
1082 constexpr 1083 operator simd
() 1084 { return {__private_init, _M_data}; } 1085 1086 template
1087 constexpr 1088 operator simd
&() 1089 { return *reinterpret_cast
*>(&_M_data); } 1090 1091 template
1092 constexpr 1093 operator simd
*() 1094 { return reinterpret_cast
*>(&_M_data); } 1095 }; 1096 1097 template
1098 __autocvt_to_simd(_Tp &&) -> __autocvt_to_simd<_Tp>; 1099 1100 template
1101 struct __autocvt_to_simd<_Tp, true> 1102 { 1103 using _TT = __remove_cvref_t<_Tp>; 1104 _Tp _M_data; 1105 fixed_size_simd<_TT, 1> _M_fd; 1106 1107 constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {} 1108 1109 ~__autocvt_to_simd() 1110 { _M_data = __data(_M_fd).first; } 1111 1112 constexpr 1113 operator fixed_size_simd<_TT, 1>() 1114 { return _M_fd; } 1115 1116 constexpr 1117 operator fixed_size_simd<_TT, 1> &() 1118 { 1119 static_assert(is_lvalue_reference<_Tp>::value, ""); 1120 static_assert(!is_const<_Tp>::value, ""); 1121 return _M_fd; 1122 } 1123 1124 constexpr 1125 operator fixed_size_simd<_TT, 1> *() 1126 { 1127 static_assert(is_lvalue_reference<_Tp>::value, ""); 1128 static_assert(!is_const<_Tp>::value, ""); 1129 return &_M_fd; 1130 } 1131 }; 1132 1133 // }}} 1134 1135 struct _CommonImplFixedSize; 1136 template
struct _SimdImplFixedSize; 1137 template
struct _MaskImplFixedSize; 1138 // simd_abi::_Fixed {{{ 1139 template
1140 struct simd_abi::_Fixed 1141 { 1142 template
static constexpr size_t _S_size = _Np; 1143 template
static constexpr size_t _S_full_size = _Np; 1144 // validity traits {{{ 1145 struct _IsValidAbiTag : public __bool_constant<(_Np > 0)> {}; 1146 1147 template
1148 struct _IsValidSizeFor 1149 : __bool_constant<(_Np <= simd_abi::max_fixed_size<_Tp>)> {}; 1150 1151 template
1152 struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>, 1153 _IsValidSizeFor<_Tp>> {}; 1154 1155 template
1156 static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value; 1157 1158 // }}} 1159 // _S_masked {{{ 1160 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 1161 _S_masked(_BitMask<_Np> __x) 1162 { return __x._M_sanitized(); } 1163 1164 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 1165 _S_masked(_SanitizedBitMask<_Np> __x) 1166 { return __x; } 1167 1168 // }}} 1169 // _*Impl {{{ 1170 using _CommonImpl = _CommonImplFixedSize; 1171 using _SimdImpl = _SimdImplFixedSize<_Np>; 1172 using _MaskImpl = _MaskImplFixedSize<_Np>; 1173 1174 // }}} 1175 // __traits {{{ 1176 template
> 1177 struct __traits : _InvalidTraits {}; 1178 1179 template
1180 struct __traits<_Tp, true> 1181 { 1182 using _IsValid = true_type; 1183 using _SimdImpl = _SimdImplFixedSize<_Np>; 1184 using _MaskImpl = _MaskImplFixedSize<_Np>; 1185 1186 // simd and simd_mask member types {{{ 1187 using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; 1188 using _MaskMember = _SanitizedBitMask<_Np>; 1189 1190 static constexpr size_t _S_simd_align 1191 = std::__bit_ceil(_Np * sizeof(_Tp)); 1192 1193 static constexpr size_t _S_mask_align = alignof(_MaskMember); 1194 1195 // }}} 1196 // _SimdBase / base class for simd, providing extra conversions {{{ 1197 struct _SimdBase 1198 { 1199 // The following ensures, function arguments are passed via the stack. 1200 // This is important for ABI compatibility across TU boundaries 1201 constexpr 1202 _SimdBase(const _SimdBase&) {} 1203 1204 _SimdBase() = default; 1205 1206 constexpr explicit 1207 operator const _SimdMember &() const 1208 { return static_cast
*>(this)->_M_data; } 1209 1210 constexpr explicit 1211 operator array<_Tp, _Np>() const 1212 { 1213 array<_Tp, _Np> __r; 1214 // _SimdMember can be larger because of higher alignment 1215 static_assert(sizeof(__r) <= sizeof(_SimdMember), ""); 1216 __builtin_memcpy(__r.data(), &static_cast
(*this), 1217 sizeof(__r)); 1218 return __r; 1219 } 1220 }; 1221 1222 // }}} 1223 // _MaskBase {{{ 1224 // empty. The bitset interface suffices 1225 struct _MaskBase {}; 1226 1227 // }}} 1228 // _SimdCastType {{{ 1229 struct _SimdCastType 1230 { 1231 constexpr 1232 _SimdCastType(const array<_Tp, _Np>&); 1233 1234 constexpr 1235 _SimdCastType(const _SimdMember& dd) : _M_data(dd) {} 1236 1237 constexpr explicit 1238 operator const _SimdMember &() const { return _M_data; } 1239 1240 private: 1241 const _SimdMember& _M_data; 1242 }; 1243 1244 // }}} 1245 // _MaskCastType {{{ 1246 class _MaskCastType 1247 { 1248 _MaskCastType() = delete; 1249 }; 1250 // }}} 1251 }; 1252 // }}} 1253 }; 1254 1255 // }}} 1256 // _CommonImplFixedSize {{{ 1257 struct _CommonImplFixedSize 1258 { 1259 // _S_store {{{ 1260 template
1261 _GLIBCXX_SIMD_INTRINSIC static void 1262 _S_store(const _SimdTuple<_Tp, _As...>& __x, void* __addr) 1263 { 1264 constexpr size_t _Np = _SimdTuple<_Tp, _As...>::_S_size(); 1265 __builtin_memcpy(__addr, &__x, _Np * sizeof(_Tp)); 1266 } 1267 1268 // }}} 1269 }; 1270 1271 // }}} 1272 // _SimdImplFixedSize {{{1 1273 // fixed_size should not inherit from _SimdMathFallback in order for 1274 // specializations in the used _SimdTuple Abis to get used 1275 template
1276 struct _SimdImplFixedSize 1277 { 1278 // member types {{{2 1279 using _MaskMember = _SanitizedBitMask<_Np>; 1280 1281 template
1282 using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; 1283 1284 template
1285 static constexpr size_t _S_tuple_size = _SimdMember<_Tp>::_S_tuple_size; 1286 1287 template
1288 using _Simd = simd<_Tp, simd_abi::fixed_size<_Np>>; 1289 1290 template
1291 using _TypeTag = _Tp*; 1292 1293 // broadcast {{{2 1294 template
1295 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 1296 _S_broadcast(_Tp __x) noexcept 1297 { 1298 return _SimdMember<_Tp>::_S_generate( 1299 [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1300 return __meta._S_broadcast(__x); 1301 }); 1302 } 1303 1304 // _S_generator {{{2 1305 template
1306 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 1307 _S_generator(_Fp&& __gen, _TypeTag<_Tp>) 1308 { 1309 return _SimdMember<_Tp>::_S_generate( 1310 [&__gen](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1311 return __meta._S_generator( 1312 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1313 return __i < _Np ? __gen(_SizeConstant<__meta._S_offset + __i>()) 1314 : 0; 1315 }, 1316 _TypeTag<_Tp>()); 1317 }); 1318 } 1319 1320 // _S_load {{{2 1321 template
1322 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 1323 _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept 1324 { 1325 return _SimdMember<_Tp>::_S_generate( 1326 [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1327 return __meta._S_load(&__mem[__meta._S_offset], _TypeTag<_Tp>()); 1328 }); 1329 } 1330 1331 // _S_masked_load {{{2 1332 template
1333 _GLIBCXX_SIMD_INTRINSIC static _SimdTuple<_Tp, _As...> 1334 _S_masked_load(const _SimdTuple<_Tp, _As...>& __old, 1335 const _MaskMember __bits, const _Up* __mem) noexcept 1336 { 1337 auto __merge = __old; 1338 __for_each(__merge, [&](auto __meta, auto& __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1339 if (__meta._S_submask(__bits).any()) 1340 #pragma GCC diagnostic push 1341 // Dereferencing __mem + __meta._S_offset could be UB ([expr.add]/4.3). 1342 // It is the responsibility of the caller of the masked load (via the mask's value) to 1343 // avoid UB. Consequently, the compiler may assume this branch is unreachable, if the 1344 // pointer arithmetic is UB. 1345 #pragma GCC diagnostic ignored "-Warray-bounds" 1346 __native 1347 = __meta._S_masked_load(__native, __meta._S_make_mask(__bits), 1348 __mem + __meta._S_offset); 1349 #pragma GCC diagnostic pop 1350 }); 1351 return __merge; 1352 } 1353 1354 // _S_store {{{2 1355 template
1356 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1357 _S_store(const _SimdMember<_Tp>& __v, _Up* __mem, _TypeTag<_Tp>) noexcept 1358 { 1359 __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1360 __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>()); 1361 }); 1362 } 1363 1364 // _S_masked_store {{{2 1365 template
1366 _GLIBCXX_SIMD_INTRINSIC static void 1367 _S_masked_store(const _SimdTuple<_Tp, _As...>& __v, _Up* __mem, 1368 const _MaskMember __bits) noexcept 1369 { 1370 __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1371 if (__meta._S_submask(__bits).any()) 1372 #pragma GCC diagnostic push 1373 // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts 1374 // the responsibility for avoiding UB to the caller of the masked 1375 // store via the mask. Consequently, the compiler may assume this 1376 // branch is unreachable, if the pointer arithmetic is UB. 1377 #pragma GCC diagnostic ignored "-Warray-bounds" 1378 __meta._S_masked_store(__native, __mem + __meta._S_offset, 1379 __meta._S_make_mask(__bits)); 1380 #pragma GCC diagnostic pop 1381 }); 1382 } 1383 1384 // negation {{{2 1385 template
1386 static constexpr inline _MaskMember 1387 _S_negate(const _SimdTuple<_Tp, _As...>& __x) noexcept 1388 { 1389 _MaskMember __bits = 0; 1390 __for_each( 1391 __x, [&__bits](auto __meta, auto __native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1392 __bits 1393 |= __meta._S_mask_to_shifted_ullong(__meta._S_negate(__native)); 1394 }); 1395 return __bits; 1396 } 1397 1398 // reductions {{{2 1399 template
1400 static constexpr inline _Tp _S_reduce(const _Simd<_Tp>& __x, 1401 const _BinaryOperation& __binary_op) 1402 { 1403 using _Tup = _SimdMember<_Tp>; 1404 const _Tup& __tup = __data(__x); 1405 if constexpr (_Tup::_S_tuple_size == 1) 1406 return _Tup::_FirstAbi::_SimdImpl::_S_reduce( 1407 __tup.template _M_simd_at<0>(), __binary_op); 1408 else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 2 1409 && _Tup::_SecondType::_S_size() == 1) 1410 { 1411 return __binary_op(simd<_Tp, simd_abi::scalar>( 1412 reduce(__tup.template _M_simd_at<0>(), 1413 __binary_op)), 1414 __tup.template _M_simd_at<1>())[0]; 1415 } 1416 else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 4 1417 && _Tup::_SecondType::_S_size() == 2) 1418 { 1419 return __binary_op( 1420 simd<_Tp, simd_abi::scalar>( 1421 reduce(__tup.template _M_simd_at<0>(), __binary_op)), 1422 simd<_Tp, simd_abi::scalar>( 1423 reduce(__tup.template _M_simd_at<1>(), __binary_op)))[0]; 1424 } 1425 else 1426 { 1427 const auto& __x2 = __call_with_n_evaluations< 1428 __div_roundup(_Tup::_S_tuple_size, 2)>( 1429 [](auto __first_simd, auto... __remaining) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1430 if constexpr (sizeof...(__remaining) == 0) 1431 return __first_simd; 1432 else 1433 { 1434 using _Tup2 1435 = _SimdTuple<_Tp, 1436 typename decltype(__first_simd)::abi_type, 1437 typename decltype(__remaining)::abi_type...>; 1438 return fixed_size_simd<_Tp, _Tup2::_S_size()>( 1439 __private_init, 1440 __make_simd_tuple(__first_simd, __remaining...)); 1441 } 1442 }, 1443 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1444 auto __left = __tup.template _M_simd_at<2 * __i>(); 1445 if constexpr (2 * __i + 1 == _Tup::_S_tuple_size) 1446 return __left; 1447 else 1448 { 1449 auto __right = __tup.template _M_simd_at<2 * __i + 1>(); 1450 using _LT = decltype(__left); 1451 using _RT = decltype(__right); 1452 if constexpr (_LT::size() == _RT::size()) 1453 return __binary_op(__left, __right); 1454 else 1455 { 1456 _GLIBCXX_SIMD_USE_CONSTEXPR_API 1457 typename _LT::mask_type __k( 1458 __private_init, 1459 [](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1460 return __j < _RT::size(); 1461 }); 1462 _LT __ext_right = __left; 1463 where(__k, __ext_right) 1464 = __proposed::resizing_simd_cast<_LT>(__right); 1465 where(__k, __left) = __binary_op(__left, __ext_right); 1466 return __left; 1467 } 1468 } 1469 }); 1470 return reduce(__x2, __binary_op); 1471 } 1472 } 1473 1474 // _S_min, _S_max {{{2 1475 template
1476 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1477 _S_min(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) 1478 { 1479 return __a._M_apply_per_chunk( 1480 [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1481 return __impl._S_min(__aa, __bb); 1482 }, 1483 __b); 1484 } 1485 1486 template
1487 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1488 _S_max(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) 1489 { 1490 return __a._M_apply_per_chunk( 1491 [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1492 return __impl._S_max(__aa, __bb); 1493 }, 1494 __b); 1495 } 1496 1497 // _S_complement {{{2 1498 template
1499 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1500 _S_complement(const _SimdTuple<_Tp, _As...>& __x) noexcept 1501 { 1502 return __x._M_apply_per_chunk( 1503 [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1504 return __impl._S_complement(__xx); 1505 }); 1506 } 1507 1508 // _S_unary_minus {{{2 1509 template
1510 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1511 _S_unary_minus(const _SimdTuple<_Tp, _As...>& __x) noexcept 1512 { 1513 return __x._M_apply_per_chunk( 1514 [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1515 return __impl._S_unary_minus(__xx); 1516 }); 1517 } 1518 1519 // arithmetic operators {{{2 1520 1521 #define _GLIBCXX_SIMD_FIXED_OP(name_, op_) \ 1522 template
\ 1523 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> name_( \ 1524 const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y) \ 1525 { \ 1526 return __x._M_apply_per_chunk( \ 1527 [](auto __impl, auto __xx, auto __yy) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ 1528 return __impl.name_(__xx, __yy); \ 1529 }, \ 1530 __y); \ 1531 } 1532 1533 _GLIBCXX_SIMD_FIXED_OP(_S_plus, +) 1534 _GLIBCXX_SIMD_FIXED_OP(_S_minus, -) 1535 _GLIBCXX_SIMD_FIXED_OP(_S_multiplies, *) 1536 _GLIBCXX_SIMD_FIXED_OP(_S_divides, /) 1537 _GLIBCXX_SIMD_FIXED_OP(_S_modulus, %) 1538 _GLIBCXX_SIMD_FIXED_OP(_S_bit_and, &) 1539 _GLIBCXX_SIMD_FIXED_OP(_S_bit_or, |) 1540 _GLIBCXX_SIMD_FIXED_OP(_S_bit_xor, ^) 1541 _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_left, <<) 1542 _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_right, >>) 1543 #undef _GLIBCXX_SIMD_FIXED_OP 1544 1545 template
1546 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1547 _S_bit_shift_left(const _SimdTuple<_Tp, _As...>& __x, int __y) 1548 { 1549 return __x._M_apply_per_chunk( 1550 [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1551 return __impl._S_bit_shift_left(__xx, __y); 1552 }); 1553 } 1554 1555 template
1556 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1557 _S_bit_shift_right(const _SimdTuple<_Tp, _As...>& __x, int __y) 1558 { 1559 return __x._M_apply_per_chunk( 1560 [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1561 return __impl._S_bit_shift_right(__xx, __y); 1562 }); 1563 } 1564 1565 // math {{{2 1566 #define _GLIBCXX_SIMD_APPLY_ON_TUPLE(_RetTp, __name) \ 1567 template
\ 1568 static inline __fixed_size_storage_t<_RetTp, _Np> \ 1569 _S_##__name(const _SimdTuple<_Tp, _As...>& __x, \ 1570 const _More&... __more) \ 1571 { \ 1572 if constexpr (sizeof...(_More) == 0) \ 1573 { \ 1574 if constexpr (is_same_v<_Tp, _RetTp>) \ 1575 return __x._M_apply_per_chunk( \ 1576 [](auto __impl, auto __xx) \ 1577 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1578 { \ 1579 using _V = typename decltype(__impl)::simd_type; \ 1580 return __data(__name(_V(__private_init, __xx))); \ 1581 }); \ 1582 else \ 1583 return __optimize_simd_tuple( \ 1584 __x.template _M_apply_r<_RetTp>( \ 1585 [](auto __impl, auto __xx) \ 1586 _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1587 { return __impl._S_##__name(__xx); })); \ 1588 } \ 1589 else if constexpr ( \ 1590 is_same_v< \ 1591 _Tp, \ 1592 _RetTp> && (... && is_same_v<_SimdTuple<_Tp, _As...>, _More>) ) \ 1593 return __x._M_apply_per_chunk( \ 1594 [](auto __impl, auto __xx, auto... __pack) \ 1595 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1596 { \ 1597 using _V = typename decltype(__impl)::simd_type; \ 1598 return __data(__name(_V(__private_init, __xx), \ 1599 _V(__private_init, __pack)...)); \ 1600 }, __more...); \ 1601 else if constexpr (is_same_v<_Tp, _RetTp>) \ 1602 return __x._M_apply_per_chunk( \ 1603 [](auto __impl, auto __xx, auto... __pack) \ 1604 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1605 { \ 1606 using _V = typename decltype(__impl)::simd_type; \ 1607 return __data(__name(_V(__private_init, __xx), \ 1608 __autocvt_to_simd(__pack)...)); \ 1609 }, __more...); \ 1610 else \ 1611 __assert_unreachable<_Tp>(); \ 1612 } 1613 1614 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acos) 1615 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asin) 1616 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan) 1617 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan2) 1618 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cos) 1619 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sin) 1620 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tan) 1621 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acosh) 1622 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asinh) 1623 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atanh) 1624 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cosh) 1625 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sinh) 1626 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tanh) 1627 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp) 1628 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp2) 1629 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, expm1) 1630 _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, ilogb) 1631 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log) 1632 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log10) 1633 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log1p) 1634 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log2) 1635 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, logb) 1636 // modf implemented in simd_math.h 1637 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, 1638 scalbn) // double scalbn(double x, int exp); 1639 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, scalbln) 1640 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cbrt) 1641 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, abs) 1642 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fabs) 1643 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, pow) 1644 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sqrt) 1645 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erf) 1646 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erfc) 1647 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, lgamma) 1648 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tgamma) 1649 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, trunc) 1650 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ceil) 1651 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, floor) 1652 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nearbyint) 1653 1654 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, rint) 1655 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lrint) 1656 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llrint) 1657 1658 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, round) 1659 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lround) 1660 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llround) 1661 1662 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ldexp) 1663 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmod) 1664 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, remainder) 1665 // copysign in simd_math.h 1666 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nextafter) 1667 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fdim) 1668 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmax) 1669 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmin) 1670 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fma) 1671 _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, fpclassify) 1672 #undef _GLIBCXX_SIMD_APPLY_ON_TUPLE 1673 1674 template
1675 static inline _SimdTuple<_Tp, _Abis...> 1676 _S_remquo(const _SimdTuple<_Tp, _Abis...>& __x, const _SimdTuple<_Tp, _Abis...>& __y, 1677 __fixed_size_storage_t
::_S_size()>* __z) 1678 { 1679 return __x._M_apply_per_chunk( 1680 [](auto __impl, const auto __xx, const auto __yy, auto& __zz) 1681 _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 1682 { return __impl._S_remquo(__xx, __yy, &__zz); }, 1683 __y, *__z); 1684 } 1685 1686 template
1687 static inline _SimdTuple<_Tp, _As...> 1688 _S_frexp(const _SimdTuple<_Tp, _As...>& __x, 1689 __fixed_size_storage_t
& __exp) noexcept 1690 { 1691 return __x._M_apply_per_chunk( 1692 [](auto __impl, const auto& __a, auto& __b) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1693 return __data(frexp(typename decltype(__impl)::simd_type(__private_init, __a), 1694 __autocvt_to_simd(__b))); 1695 }, __exp); 1696 } 1697 1698 #define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_) \ 1699 template
\ 1700 static inline _MaskMember \ 1701 _S_##name_(const _SimdTuple<_Tp, _As...>& __x) noexcept \ 1702 { \ 1703 return _M_test([] (auto __impl, auto __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ 1704 return __impl._S_##name_(__xx); \ 1705 }, __x); \ 1706 } 1707 1708 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isinf) 1709 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isfinite) 1710 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnan) 1711 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnormal) 1712 _GLIBCXX_SIMD_TEST_ON_TUPLE_(signbit) 1713 #undef _GLIBCXX_SIMD_TEST_ON_TUPLE_ 1714 1715 // _S_increment & _S_decrement{{{2 1716 template
1717 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1718 _S_increment(_SimdTuple<_Ts...>& __x) 1719 { 1720 __for_each( 1721 __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1722 __meta._S_increment(native); 1723 }); 1724 } 1725 1726 template
1727 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1728 _S_decrement(_SimdTuple<_Ts...>& __x) 1729 { 1730 __for_each( 1731 __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1732 __meta._S_decrement(native); 1733 }); 1734 } 1735 1736 // compares {{{2 1737 #define _GLIBCXX_SIMD_CMP_OPERATIONS(__cmp) \ 1738 template
\ 1739 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember \ 1740 __cmp(const _SimdTuple<_Tp, _As...>& __x, \ 1741 const _SimdTuple<_Tp, _As...>& __y) \ 1742 { \ 1743 return _M_test([](auto __impl, auto __xx, auto __yy) \ 1744 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1745 { return __impl.__cmp(__xx, __yy); }, \ 1746 __x, __y); \ 1747 } 1748 1749 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_equal_to) 1750 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_not_equal_to) 1751 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less) 1752 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less_equal) 1753 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isless) 1754 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessequal) 1755 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreater) 1756 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreaterequal) 1757 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessgreater) 1758 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isunordered) 1759 #undef _GLIBCXX_SIMD_CMP_OPERATIONS 1760 1761 // smart_reference access {{{2 1762 template
1763 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1764 _S_set(_SimdTuple<_Tp, _As...>& __v, int __i, _Up&& __x) noexcept 1765 { __v._M_set(__i, static_cast<_Up&&>(__x)); } 1766 1767 // _S_masked_assign {{{2 1768 template
1769 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1770 _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1771 const __type_identity_t<_SimdTuple<_Tp, _As...>>& __rhs) 1772 { 1773 __for_each(__lhs, __rhs, 1774 [&](auto __meta, auto& __native_lhs, auto __native_rhs) 1775 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 1776 { 1777 __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, 1778 __native_rhs); 1779 }); 1780 } 1781 1782 // Optimization for the case where the RHS is a scalar. No need to broadcast 1783 // the scalar to a simd first. 1784 template
1785 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1786 _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1787 const __type_identity_t<_Tp> __rhs) 1788 { 1789 __for_each( 1790 __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1791 __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, 1792 __rhs); 1793 }); 1794 } 1795 1796 // _S_masked_cassign {{{2 1797 template
1798 static constexpr inline void 1799 _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1800 const _SimdTuple<_Tp, _As...>& __rhs, _Op __op) 1801 { 1802 __for_each(__lhs, __rhs, 1803 [&](auto __meta, auto& __native_lhs, auto __native_rhs) 1804 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 1805 { 1806 __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), 1807 __native_lhs, __native_rhs, __op); 1808 }); 1809 } 1810 1811 // Optimization for the case where the RHS is a scalar. No need to broadcast 1812 // the scalar to a simd first. 1813 template
1814 static constexpr inline void 1815 _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1816 const _Tp& __rhs, _Op __op) 1817 { 1818 __for_each( 1819 __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1820 __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), 1821 __native_lhs, __rhs, __op); 1822 }); 1823 } 1824 1825 // _S_masked_unary {{{2 1826 template
class _Op, typename _Tp, typename... _As> 1827 static constexpr inline _SimdTuple<_Tp, _As...> 1828 _S_masked_unary(const _MaskMember __bits, 1829 const _SimdTuple<_Tp, _As...> __v) // TODO: const-ref __v? 1830 { 1831 return __v._M_apply_wrapped([&__bits](auto __meta, 1832 auto __native) constexpr { 1833 return __meta.template _S_masked_unary<_Op>(__meta._S_make_mask( 1834 __bits), 1835 __native); 1836 }); 1837 } 1838 1839 // }}}2 1840 }; 1841 1842 // _MaskImplFixedSize {{{1 1843 template
1844 struct _MaskImplFixedSize 1845 { 1846 static_assert( 1847 sizeof(_ULLong) * __CHAR_BIT__ >= _Np, 1848 "The fixed_size implementation relies on one _ULLong being able to store " 1849 "all boolean elements."); // required in load & store 1850 1851 // member types {{{ 1852 using _Abi = simd_abi::fixed_size<_Np>; 1853 1854 using _MaskMember = _SanitizedBitMask<_Np>; 1855 1856 template
1857 using _FirstAbi = typename __fixed_size_storage_t<_Tp, _Np>::_FirstAbi; 1858 1859 template
1860 using _TypeTag = _Tp*; 1861 1862 // }}} 1863 // _S_broadcast {{{ 1864 template
1865 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1866 _S_broadcast(bool __x) 1867 { return __x ? ~_MaskMember() : _MaskMember(); } 1868 1869 // }}} 1870 // _S_load {{{ 1871 template
1872 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1873 _S_load(const bool* __mem) 1874 { 1875 if (__builtin_is_constant_evaluated()) 1876 { 1877 _MaskMember __r{}; 1878 for (size_t __i = 0; __i < _Np; ++__i) 1879 __r.set(__i, __mem[__i]); 1880 return __r; 1881 } 1882 using _Ip = __int_for_sizeof_t
; 1883 // the following load uses element_aligned and relies on __mem already 1884 // carrying alignment information from when this load function was 1885 // called. 1886 const simd<_Ip, _Abi> __bools(reinterpret_cast
*>( 1887 __mem), 1888 element_aligned); 1889 return __data(__bools != 0); 1890 } 1891 1892 // }}} 1893 // _S_to_bits {{{ 1894 template
1895 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 1896 _S_to_bits(_BitMask<_Np, _Sanitized> __x) 1897 { 1898 if constexpr (_Sanitized) 1899 return __x; 1900 else 1901 return __x._M_sanitized(); 1902 } 1903 1904 // }}} 1905 // _S_convert {{{ 1906 template
1907 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1908 _S_convert(simd_mask<_Up, _UAbi> __x) 1909 { 1910 return _UAbi::_MaskImpl::_S_to_bits(__data(__x)) 1911 .template _M_extract<0, _Np>(); 1912 } 1913 1914 // }}} 1915 // _S_from_bitmask {{{2 1916 template
1917 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1918 _S_from_bitmask(_MaskMember __bits, _TypeTag<_Tp>) noexcept 1919 { return __bits; } 1920 1921 // _S_load {{{2 1922 static constexpr inline _MaskMember 1923 _S_load(const bool* __mem) noexcept 1924 { 1925 // TODO: _UChar is not necessarily the best type to use here. For smaller 1926 // _Np _UShort, _UInt, _ULLong, float, and double can be more efficient. 1927 _ULLong __r = 0; 1928 using _Vs = __fixed_size_storage_t<_UChar, _Np>; 1929 __for_each(_Vs{}, [&](auto __meta, auto) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1930 __r |= __meta._S_mask_to_shifted_ullong( 1931 __meta._S_mask_impl._S_load(&__mem[__meta._S_offset], 1932 _SizeConstant<__meta._S_size()>())); 1933 }); 1934 return __r; 1935 } 1936 1937 // _S_masked_load {{{2 1938 static constexpr inline _MaskMember 1939 _S_masked_load(_MaskMember __merge, _MaskMember __mask, const bool* __mem) noexcept 1940 { 1941 _BitOps::_S_bit_iteration(__mask.to_ullong(), 1942 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1943 __merge.set(__i, __mem[__i]); 1944 }); 1945 return __merge; 1946 } 1947 1948 // _S_store {{{2 1949 static constexpr inline void 1950 _S_store(const _MaskMember __bitmask, bool* __mem) noexcept 1951 { 1952 if constexpr (_Np == 1) 1953 __mem[0] = __bitmask[0]; 1954 else 1955 _FirstAbi<_UChar>::_CommonImpl::_S_store_bool_array(__bitmask, __mem); 1956 } 1957 1958 // _S_masked_store {{{2 1959 static constexpr inline void 1960 _S_masked_store(const _MaskMember __v, bool* __mem, const _MaskMember __k) noexcept 1961 { 1962 _BitOps::_S_bit_iteration( 1963 __k, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __mem[__i] = __v[__i]; }); 1964 } 1965 1966 // logical and bitwise operators {{{2 1967 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1968 _S_logical_and(const _MaskMember& __x, const _MaskMember& __y) noexcept 1969 { return __x & __y; } 1970 1971 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1972 _S_logical_or(const _MaskMember& __x, const _MaskMember& __y) noexcept 1973 { return __x | __y; } 1974 1975 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1976 _S_bit_not(const _MaskMember& __x) noexcept 1977 { return ~__x; } 1978 1979 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1980 _S_bit_and(const _MaskMember& __x, const _MaskMember& __y) noexcept 1981 { return __x & __y; } 1982 1983 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1984 _S_bit_or(const _MaskMember& __x, const _MaskMember& __y) noexcept 1985 { return __x | __y; } 1986 1987 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1988 _S_bit_xor(const _MaskMember& __x, const _MaskMember& __y) noexcept 1989 { return __x ^ __y; } 1990 1991 // smart_reference access {{{2 1992 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1993 _S_set(_MaskMember& __k, int __i, bool __x) noexcept 1994 { __k.set(__i, __x); } 1995 1996 // _S_masked_assign {{{2 1997 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1998 _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const _MaskMember __rhs) 1999 { __lhs = (__lhs & ~__k) | (__rhs & __k); } 2000 2001 // Optimization for the case where the RHS is a scalar. 2002 _GLIBCXX_SIMD_INTRINSIC static constexpr void 2003 _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const bool __rhs) 2004 { 2005 if (__rhs) 2006 __lhs |= __k; 2007 else 2008 __lhs &= ~__k; 2009 } 2010 2011 // }}}2 2012 // _S_all_of {{{ 2013 template
2014 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 2015 _S_all_of(simd_mask<_Tp, _Abi> __k) 2016 { return __data(__k).all(); } 2017 2018 // }}} 2019 // _S_any_of {{{ 2020 template
2021 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 2022 _S_any_of(simd_mask<_Tp, _Abi> __k) 2023 { return __data(__k).any(); } 2024 2025 // }}} 2026 // _S_none_of {{{ 2027 template
2028 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 2029 _S_none_of(simd_mask<_Tp, _Abi> __k) 2030 { return __data(__k).none(); } 2031 2032 // }}} 2033 // _S_some_of {{{ 2034 template
2035 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 2036 _S_some_of([[maybe_unused]] simd_mask<_Tp, _Abi> __k) 2037 { 2038 if constexpr (_Np == 1) 2039 return false; 2040 else 2041 return __data(__k).any() && !__data(__k).all(); 2042 } 2043 2044 // }}} 2045 // _S_popcount {{{ 2046 template
2047 _GLIBCXX_SIMD_INTRINSIC static constexpr int 2048 _S_popcount(simd_mask<_Tp, _Abi> __k) 2049 { return __data(__k).count(); } 2050 2051 // }}} 2052 // _S_find_first_set {{{ 2053 template
2054 _GLIBCXX_SIMD_INTRINSIC static constexpr int 2055 _S_find_first_set(simd_mask<_Tp, _Abi> __k) 2056 { return std::__countr_zero(__data(__k).to_ullong()); } 2057 2058 // }}} 2059 // _S_find_last_set {{{ 2060 template
2061 _GLIBCXX_SIMD_INTRINSIC static constexpr int 2062 _S_find_last_set(simd_mask<_Tp, _Abi> __k) 2063 { return std::__bit_width(__data(__k).to_ullong()) - 1; } 2064 2065 // }}} 2066 }; 2067 // }}}1 2068 2069 _GLIBCXX_SIMD_END_NAMESPACE 2070 #endif // __cplusplus >= 201703L 2071 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ 2072 2073 // vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
Contact us
|
About us
|
Term of use
|
Copyright © 2000-2025 MyWebUniversity.com ™