Where Online Learning is simpler!
The C and C++ Include Header Files
/usr/include/c++/13/experimental/bits/simd_fixed_size.h
$ cat -n /usr/include/c++/13/experimental/bits/simd_fixed_size.h 1 // Simd fixed_size ABI specific implementations -*- C++ -*- 2 3 // Copyright (C) 2020-2023 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 //
. 24 25 /* 26 * The fixed_size ABI gives the following guarantees: 27 * - simd objects are passed via the stack 28 * - memory layout of `simd<_Tp, _Np>` is equivalent to `array<_Tp, _Np>` 29 * - alignment of `simd<_Tp, _Np>` is `_Np * sizeof(_Tp)` if _Np is __a 30 * power-of-2 value, otherwise `std::__bit_ceil(_Np * sizeof(_Tp))` (Note: 31 * if the alignment were to exceed the system/compiler maximum, it is bounded 32 * to that maximum) 33 * - simd_mask objects are passed like bitset<_Np> 34 * - memory layout of `simd_mask<_Tp, _Np>` is equivalent to `bitset<_Np>` 35 * - alignment of `simd_mask<_Tp, _Np>` is equal to the alignment of 36 * `bitset<_Np>` 37 */ 38 39 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ 40 #define _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ 41 42 #if __cplusplus >= 201703L 43 44 #include
45 46 _GLIBCXX_SIMD_BEGIN_NAMESPACE 47 48 // __simd_tuple_element {{{ 49 template
50 struct __simd_tuple_element; 51 52 template
53 struct __simd_tuple_element<0, _SimdTuple<_Tp, _A0, _As...>> 54 { using type = simd<_Tp, _A0>; }; 55 56 template
57 struct __simd_tuple_element<_I, _SimdTuple<_Tp, _A0, _As...>> 58 { using type = typename __simd_tuple_element<_I - 1, _SimdTuple<_Tp, _As...>>::type; }; 59 60 template
61 using __simd_tuple_element_t = typename __simd_tuple_element<_I, _Tp>::type; 62 63 // }}} 64 // __simd_tuple_concat {{{ 65 66 template
67 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0s..., _A1s...> 68 __simd_tuple_concat(const _SimdTuple<_Tp, _A0s...>& __left, 69 const _SimdTuple<_Tp, _A1s...>& __right) 70 { 71 if constexpr (sizeof...(_A0s) == 0) 72 return __right; 73 else if constexpr (sizeof...(_A1s) == 0) 74 return __left; 75 else 76 return {__left.first, __simd_tuple_concat(__left.second, __right)}; 77 } 78 79 template
80 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, simd_abi::scalar, _A10, _A1s...> 81 __simd_tuple_concat(const _Tp& __left, const _SimdTuple<_Tp, _A10, _A1s...>& __right) 82 { return {__left, __right}; } 83 84 // }}} 85 // __simd_tuple_pop_front {{{ 86 // Returns the next _SimdTuple in __x that has _Np elements less. 87 // Precondition: _Np must match the number of elements in __first (recursively) 88 template
89 _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) 90 __simd_tuple_pop_front(_Tp&& __x) 91 { 92 if constexpr (_Np == 0) 93 return static_cast<_Tp&&>(__x); 94 else 95 { 96 using _Up = __remove_cvref_t<_Tp>; 97 static_assert(_Np >= _Up::_S_first_size); 98 return __simd_tuple_pop_front<_Np - _Up::_S_first_size>(__x.second); 99 } 100 } 101 102 // }}} 103 // __get_simd_at<_Np> {{{1 104 struct __as_simd {}; 105 106 struct __as_simd_tuple {}; 107 108 template
109 _GLIBCXX_SIMD_INTRINSIC constexpr simd<_Tp, _A0> 110 __simd_tuple_get_impl(__as_simd, const _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) 111 { return {__private_init, __t.first}; } 112 113 template
114 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 115 __simd_tuple_get_impl(__as_simd_tuple, const _SimdTuple<_Tp, _A0, _Abis...>& __t, 116 _SizeConstant<0>) 117 { return __t.first; } 118 119 template
120 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 121 __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) 122 { return __t.first; } 123 124 template
125 _GLIBCXX_SIMD_INTRINSIC constexpr auto 126 __simd_tuple_get_impl(_R, const _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) 127 { return __simd_tuple_get_impl(_R(), __t.second, _SizeConstant<_Np - 1>()); } 128 129 template
130 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 131 __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) 132 { return __simd_tuple_get_impl(__as_simd_tuple(), __t.second, _SizeConstant<_Np - 1>()); } 133 134 template
135 _GLIBCXX_SIMD_INTRINSIC constexpr auto 136 __get_simd_at(const _SimdTuple<_Tp, _Abis...>& __t) 137 { return __simd_tuple_get_impl(__as_simd(), __t, _SizeConstant<_Np>()); } 138 139 // }}} 140 // __get_tuple_at<_Np> {{{ 141 template
142 _GLIBCXX_SIMD_INTRINSIC constexpr auto 143 __get_tuple_at(const _SimdTuple<_Tp, _Abis...>& __t) 144 { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } 145 146 template
147 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 148 __get_tuple_at(_SimdTuple<_Tp, _Abis...>& __t) 149 { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } 150 151 // __tuple_element_meta {{{1 152 template
153 struct __tuple_element_meta : public _Abi::_SimdImpl 154 { 155 static_assert(is_same_v
); // this fails e.g. when _SimdImpl is an 157 // alias for _SimdImplBuiltin<_DifferentAbi> 158 using value_type = _Tp; 159 using abi_type = _Abi; 160 using _Traits = _SimdTraits<_Tp, _Abi>; 161 using _MaskImpl = typename _Abi::_MaskImpl; 162 using _MaskMember = typename _Traits::_MaskMember; 163 using simd_type = simd<_Tp, _Abi>; 164 static constexpr size_t _S_offset = _Offset; 165 static constexpr size_t _S_size() { return simd_size<_Tp, _Abi>::value; } 166 static constexpr _MaskImpl _S_mask_impl = {}; 167 168 template
169 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 170 _S_submask(_BitMask<_Np, _Sanitized> __bits) 171 { return __bits.template _M_extract<_Offset, _S_size()>(); } 172 173 template
174 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 175 _S_make_mask(_BitMask<_Np, _Sanitized> __bits) 176 { 177 return _MaskImpl::template _S_convert<_Tp>( 178 __bits.template _M_extract<_Offset, _S_size()>()._M_sanitized()); 179 } 180 181 _GLIBCXX_SIMD_INTRINSIC static constexpr _ULLong 182 _S_mask_to_shifted_ullong(_MaskMember __k) 183 { return _MaskImpl::_S_to_bits(__k).to_ullong() << _Offset; } 184 }; 185 186 template
187 _GLIBCXX_SIMD_INTRINSIC constexpr 188 __tuple_element_meta<_Tp, _Abi, _Offset> 189 __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&) 190 { return {}; } 191 192 // }}}1 193 // _WithOffset wrapper class {{{ 194 template
195 struct _WithOffset : public _Base 196 { 197 static inline constexpr size_t _S_offset = _Offset; 198 199 _GLIBCXX_SIMD_INTRINSIC char* 200 _M_as_charptr() 201 { return reinterpret_cast
(this) + _S_offset * sizeof(typename _Base::value_type); } 202 203 _GLIBCXX_SIMD_INTRINSIC const char* 204 _M_as_charptr() const 205 { return reinterpret_cast
(this) + _S_offset * sizeof(typename _Base::value_type); } 206 }; 207 208 // make _WithOffset<_WithOffset> ill-formed to use: 209 template
210 struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {}; 211 212 template
213 _GLIBCXX_SIMD_INTRINSIC 214 decltype(auto) 215 __add_offset(_Tp& __base) 216 { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } 217 218 template
219 _GLIBCXX_SIMD_INTRINSIC 220 decltype(auto) 221 __add_offset(const _Tp& __base) 222 { return static_cast
>&>(__base); } 223 224 template
225 _GLIBCXX_SIMD_INTRINSIC 226 decltype(auto) 227 __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base) 228 { return static_cast<_WithOffset<_Offset + _ExistingOffset, _Tp>&>(static_cast<_Tp&>(__base)); } 229 230 template
231 _GLIBCXX_SIMD_INTRINSIC 232 decltype(auto) 233 __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base) 234 { 235 return static_cast
&>( 236 static_cast
(__base)); 237 } 238 239 template
240 constexpr inline size_t __offset = 0; 241 242 template
243 constexpr inline size_t __offset<_WithOffset<_Offset, _Tp>> 244 = _WithOffset<_Offset, _Tp>::_S_offset; 245 246 template
247 constexpr inline size_t __offset
= __offset<_Tp>; 248 249 template
250 constexpr inline size_t __offset<_Tp&> = __offset<_Tp>; 251 252 template
253 constexpr inline size_t __offset<_Tp&&> = __offset<_Tp>; 254 255 // }}} 256 // _SimdTuple specializations {{{1 257 // empty {{{2 258 template
259 struct _SimdTuple<_Tp> 260 { 261 using value_type = _Tp; 262 static constexpr size_t _S_tuple_size = 0; 263 static constexpr size_t _S_size() { return 0; } 264 }; 265 266 // _SimdTupleData {{{2 267 template
268 struct _SimdTupleData 269 { 270 _FirstType first; 271 _SecondType second; 272 273 _GLIBCXX_SIMD_INTRINSIC 274 constexpr bool 275 _M_is_constprop() const 276 { 277 if constexpr (is_class_v<_FirstType>) 278 return first._M_is_constprop() && second._M_is_constprop(); 279 else 280 return __builtin_constant_p(first) && second._M_is_constprop(); 281 } 282 }; 283 284 template
285 struct _SimdTupleData<_FirstType, _SimdTuple<_Tp>> 286 { 287 _FirstType first; 288 static constexpr _SimdTuple<_Tp> second = {}; 289 290 _GLIBCXX_SIMD_INTRINSIC 291 constexpr bool 292 _M_is_constprop() const 293 { 294 if constexpr (is_class_v<_FirstType>) 295 return first._M_is_constprop(); 296 else 297 return __builtin_constant_p(first); 298 } 299 }; 300 301 // 1 or more {{{2 302 template
303 struct _SimdTuple<_Tp, _Abi0, _Abis...> 304 : _SimdTupleData
::_SimdMember, 305 _SimdTuple<_Tp, _Abis...>> 306 { 307 static_assert(!__is_fixed_size_abi_v<_Abi0>); 308 using value_type = _Tp; 309 using _FirstType = typename _SimdTraits<_Tp, _Abi0>::_SimdMember; 310 using _FirstAbi = _Abi0; 311 using _SecondType = _SimdTuple<_Tp, _Abis...>; 312 static constexpr size_t _S_tuple_size = sizeof...(_Abis) + 1; 313 314 static constexpr size_t _S_size() 315 { return simd_size_v<_Tp, _Abi0> + _SecondType::_S_size(); } 316 317 static constexpr size_t _S_first_size = simd_size_v<_Tp, _Abi0>; 318 static constexpr bool _S_is_homogeneous = (is_same_v<_Abi0, _Abis> && ...); 319 320 using _Base = _SimdTupleData
::_SimdMember, 321 _SimdTuple<_Tp, _Abis...>>; 322 using _Base::first; 323 using _Base::second; 324 325 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple() = default; 326 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(const _SimdTuple&) = default; 327 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple& operator=(const _SimdTuple&) 328 = default; 329 330 template
331 _GLIBCXX_SIMD_INTRINSIC constexpr 332 _SimdTuple(_Up&& __x) 333 : _Base{static_cast<_Up&&>(__x)} {} 334 335 template
336 _GLIBCXX_SIMD_INTRINSIC constexpr 337 _SimdTuple(_Up&& __x, _Up2&& __y) 338 : _Base{static_cast<_Up&&>(__x), static_cast<_Up2&&>(__y)} {} 339 340 template
341 _GLIBCXX_SIMD_INTRINSIC constexpr 342 _SimdTuple(_Up&& __x, _SimdTuple<_Tp>) 343 : _Base{static_cast<_Up&&>(__x)} {} 344 345 _GLIBCXX_SIMD_INTRINSIC char* 346 _M_as_charptr() 347 { return reinterpret_cast
(this); } 348 349 _GLIBCXX_SIMD_INTRINSIC const char* 350 _M_as_charptr() const 351 { return reinterpret_cast
(this); } 352 353 template
354 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 355 _M_at() 356 { 357 if constexpr (_Np == 0) 358 return first; 359 else 360 return second.template _M_at<_Np - 1>(); 361 } 362 363 template
364 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 365 _M_at() const 366 { 367 if constexpr (_Np == 0) 368 return first; 369 else 370 return second.template _M_at<_Np - 1>(); 371 } 372 373 template
374 _GLIBCXX_SIMD_INTRINSIC constexpr auto 375 _M_simd_at() const 376 { 377 if constexpr (_Np == 0) 378 return simd<_Tp, _Abi0>(__private_init, first); 379 else 380 return second.template _M_simd_at<_Np - 1>(); 381 } 382 383 template
384 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple 385 _S_generate(_Fp&& __gen, _SizeConstant<_Offset> = {}) 386 { 387 auto&& __first = __gen(__tuple_element_meta<_Tp, _Abi0, _Offset>()); 388 if constexpr (_S_tuple_size == 1) 389 return {__first}; 390 else 391 return {__first, 392 _SecondType::_S_generate( 393 static_cast<_Fp&&>(__gen), 394 _SizeConstant<_Offset + simd_size_v<_Tp, _Abi0>>())}; 395 } 396 397 template
398 _GLIBCXX_SIMD_INTRINSIC _SimdTuple 399 _M_apply_wrapped(_Fp&& __fun, const _More&... __more) const 400 { 401 auto&& __first 402 = __fun(__make_meta<_Offset>(*this), first, __more.first...); 403 if constexpr (_S_tuple_size == 1) 404 return {__first}; 405 else 406 return { 407 __first, 408 second.template _M_apply_wrapped<_Offset + simd_size_v<_Tp, _Abi0>>( 409 static_cast<_Fp&&>(__fun), __more.second...)}; 410 } 411 412 template
413 _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) 414 _M_extract_argument(_Tup&& __tup) const 415 { 416 using _TupT = typename __remove_cvref_t<_Tup>::value_type; 417 if constexpr (is_same_v<_SimdTuple, __remove_cvref_t<_Tup>>) 418 return __tup.first; 419 else if (__builtin_is_constant_evaluated()) 420 return __fixed_size_storage_t<_TupT, _S_first_size>::_S_generate( 421 [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 422 return __meta._S_generator( 423 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 424 return __tup[__i]; 425 }, static_cast<_TupT*>(nullptr)); 426 }); 427 else 428 return [&]() { // not always_inline; allow the compiler to decide 429 __fixed_size_storage_t<_TupT, _S_first_size> __r; 430 __builtin_memcpy(__r._M_as_charptr(), __tup._M_as_charptr(), 431 sizeof(__r)); 432 return __r; 433 }(); 434 } 435 436 template
437 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 438 _M_skip_argument(_Tup&& __tup) const 439 { 440 static_assert(_S_tuple_size > 1); 441 using _Up = __remove_cvref_t<_Tup>; 442 constexpr size_t __off = __offset<_Up>; 443 if constexpr (_S_first_size == _Up::_S_first_size && __off == 0) 444 return __tup.second; 445 else if constexpr (_S_first_size > _Up::_S_first_size 446 && _S_first_size % _Up::_S_first_size == 0 447 && __off == 0) 448 return __simd_tuple_pop_front<_S_first_size>(__tup); 449 else if constexpr (_S_first_size + __off < _Up::_S_first_size) 450 return __add_offset<_S_first_size>(__tup); 451 else if constexpr (_S_first_size + __off == _Up::_S_first_size) 452 return __tup.second; 453 else 454 __assert_unreachable<_Tup>(); 455 } 456 457 template
458 _GLIBCXX_SIMD_INTRINSIC constexpr void 459 _M_assign_front(const _SimdTuple<_Tp, _Abi0, _More...>& __x) & 460 { 461 static_assert(_Offset == 0); 462 first = __x.first; 463 if constexpr (sizeof...(_More) > 0) 464 { 465 static_assert(sizeof...(_Abis) >= sizeof...(_More)); 466 second.template _M_assign_front<0>(__x.second); 467 } 468 } 469 470 template
471 _GLIBCXX_SIMD_INTRINSIC constexpr void 472 _M_assign_front(const _FirstType& __x) & 473 { 474 static_assert(_Offset == 0); 475 first = __x; 476 } 477 478 template
479 _GLIBCXX_SIMD_INTRINSIC constexpr void 480 _M_assign_front(const _SimdTuple<_Tp, _As...>& __x) & 481 { 482 __builtin_memcpy(_M_as_charptr() + _Offset * sizeof(value_type), 483 __x._M_as_charptr(), 484 sizeof(_Tp) * _SimdTuple<_Tp, _As...>::_S_size()); 485 } 486 487 /* 488 * Iterate over the first objects in this _SimdTuple and call __fun for each 489 * of them. If additional arguments are passed via __more, chunk them into 490 * _SimdTuple or __vector_type_t objects of the same number of values. 491 */ 492 template
493 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple 494 _M_apply_per_chunk(_Fp&& __fun, _More&&... __more) const 495 { 496 if constexpr ((... 497 || conjunction_v< 498 is_lvalue_reference<_More>, 499 negation
>>>) ) 500 { 501 // need to write back at least one of __more after calling __fun 502 auto&& __first = [&](auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 503 auto __r = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, 504 __args...); 505 [[maybe_unused]] auto&& __ignore_me = {( 506 [](auto&& __dst, const auto& __src) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 507 if constexpr (is_assignable_v
) 509 { 510 __dst.template _M_assign_front<__offset
>( 511 __src); 512 } 513 }(static_cast<_More&&>(__more), __args), 514 0)...}; 515 return __r; 516 }(_M_extract_argument(__more)...); 517 if constexpr (_S_tuple_size == 1) 518 return {__first}; 519 else 520 return {__first, 521 second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), 522 _M_skip_argument(__more)...)}; 523 } 524 else 525 { 526 auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, 527 _M_extract_argument(__more)...); 528 if constexpr (_S_tuple_size == 1) 529 return {__first}; 530 else 531 return {__first, 532 second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), 533 _M_skip_argument(__more)...)}; 534 } 535 } 536 537 template
538 _GLIBCXX_SIMD_INTRINSIC constexpr auto 539 _M_apply_r(_Fp&& __fun, const _More&... __more) const 540 { 541 auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, 542 __more.first...); 543 if constexpr (_S_tuple_size == 1) 544 return __first; 545 else 546 return __simd_tuple_concat<_R>( 547 __first, second.template _M_apply_r<_R>(static_cast<_Fp&&>(__fun), 548 __more.second...)); 549 } 550 551 template
552 _GLIBCXX_SIMD_INTRINSIC constexpr friend _SanitizedBitMask<_S_size()> 553 _M_test(const _Fp& __fun, const _SimdTuple& __x, const _More&... __more) 554 { 555 const _SanitizedBitMask<_S_first_size> __first 556 = _Abi0::_MaskImpl::_S_to_bits( 557 __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), __x.first, 558 __more.first...)); 559 if constexpr (_S_tuple_size == 1) 560 return __first; 561 else 562 return _M_test(__fun, __x.second, __more.second...) 563 ._M_prepend(__first); 564 } 565 566 template
567 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 568 operator[](integral_constant<_Up, _I>) const noexcept 569 { 570 if constexpr (_I < simd_size_v<_Tp, _Abi0>) 571 return _M_subscript_read(_I); 572 else 573 return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()]; 574 } 575 576 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 577 operator[](size_t __i) const noexcept 578 { 579 if constexpr (_S_tuple_size == 1) 580 return _M_subscript_read(__i); 581 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 582 else if (not __builtin_is_constant_evaluated()) 583 return reinterpret_cast
*>(this)[__i]; 584 #endif 585 else if constexpr (__is_scalar_abi<_Abi0>()) 586 { 587 const _Tp* ptr = &first; 588 return ptr[__i]; 589 } 590 else 591 return __i < simd_size_v<_Tp, _Abi0> ? _M_subscript_read(__i) 592 : second[__i - simd_size_v<_Tp, _Abi0>]; 593 } 594 595 _GLIBCXX_SIMD_INTRINSIC constexpr void 596 _M_set(size_t __i, _Tp __val) noexcept 597 { 598 if constexpr (_S_tuple_size == 1) 599 return _M_subscript_write(__i, __val); 600 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 601 else if (not __builtin_is_constant_evaluated()) 602 reinterpret_cast<__may_alias<_Tp>*>(this)[__i] = __val; 603 #endif 604 else if (__i < simd_size_v<_Tp, _Abi0>) 605 _M_subscript_write(__i, __val); 606 else 607 second._M_set(__i - simd_size_v<_Tp, _Abi0>, __val); 608 } 609 610 private: 611 // _M_subscript_read/_write {{{ 612 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 613 _M_subscript_read([[maybe_unused]] size_t __i) const noexcept 614 { 615 if constexpr (__is_vectorizable_v<_FirstType>) 616 return first; 617 else 618 return first[__i]; 619 } 620 621 _GLIBCXX_SIMD_INTRINSIC constexpr void 622 _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept 623 { 624 if constexpr (__is_vectorizable_v<_FirstType>) 625 first = __y; 626 else 627 first._M_set(__i, __y); 628 } 629 630 // }}} 631 }; 632 633 // __make_simd_tuple {{{1 634 template
635 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0> 636 __make_simd_tuple(simd<_Tp, _A0> __x0) 637 { return {__data(__x0)}; } 638 639 template
640 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _As...> 641 __make_simd_tuple(const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs) 642 { return {__data(__x0), __make_simd_tuple(__xs...)}; } 643 644 template
645 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0> 646 __make_simd_tuple(const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0) 647 { return {__arg0}; } 648 649 template
650 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _A1, _Abis...> 651 __make_simd_tuple( 652 const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0, 653 const typename _SimdTraits<_Tp, _A1>::_SimdMember& __arg1, 654 const typename _SimdTraits<_Tp, _Abis>::_SimdMember&... __args) 655 { return {__arg0, __make_simd_tuple<_Tp, _A1, _Abis...>(__arg1, __args...)}; } 656 657 // __to_simd_tuple {{{1 658 template
659 _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> 660 __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX); 661 662 template
, typename _V0, 665 typename _V0VT = _VectorTraits<_V0>, typename... _VX> 666 _GLIBCXX_SIMD_INTRINSIC _R constexpr __to_simd_tuple(const _V0 __from0, const _VX... __fromX) 667 { 668 static_assert(is_same_v
); 669 static_assert(_Offset < _V0VT::_S_full_size); 670 using _R0 = __vector_type_t<_Tp, _R::_S_first_size>; 671 if constexpr (_R::_S_tuple_size == 1) 672 { 673 if constexpr (_Np == 1) 674 return _R{__from0[_Offset]}; 675 else if constexpr (_Offset == 0 && _V0VT::_S_full_size >= _Np) 676 return _R{__intrin_bitcast<_R0>(__from0)}; 677 else if constexpr (_Offset * 2 == _V0VT::_S_full_size 678 && _V0VT::_S_full_size / 2 >= _Np) 679 return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0))}; 680 else if constexpr (_Offset * 4 == _V0VT::_S_full_size 681 && _V0VT::_S_full_size / 4 >= _Np) 682 return _R{__intrin_bitcast<_R0>(__extract_part<1, 4>(__from0))}; 683 else 684 __assert_unreachable<_Tp>(); 685 } 686 else 687 { 688 if constexpr (1 == _R::_S_first_size) 689 { // extract one scalar and recurse 690 if constexpr (_Offset + 1 < _V0VT::_S_full_size) 691 return _R{__from0[_Offset], 692 __to_simd_tuple<_Tp, _Np - 1, _Offset + 1>(__from0, 693 __fromX...)}; 694 else 695 return _R{__from0[_Offset], 696 __to_simd_tuple<_Tp, _Np - 1, 0>(__fromX...)}; 697 } 698 699 // place __from0 into _R::first and recurse for __fromX -> _R::second 700 else if constexpr (_V0VT::_S_full_size == _R::_S_first_size 701 && _Offset == 0) 702 return _R{__from0, 703 __to_simd_tuple<_Tp, _Np - _R::_S_first_size>(__fromX...)}; 704 705 // place lower part of __from0 into _R::first and recurse with _Offset 706 else if constexpr (_V0VT::_S_full_size > _R::_S_first_size 707 && _Offset == 0) 708 return _R{__intrin_bitcast<_R0>(__from0), 709 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 710 _R::_S_first_size>(__from0, __fromX...)}; 711 712 // place lower part of second quarter of __from0 into _R::first and 713 // recurse with _Offset 714 else if constexpr (_Offset * 4 == _V0VT::_S_full_size 715 && _V0VT::_S_full_size >= 4 * _R::_S_first_size) 716 return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), 717 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 718 _Offset + _R::_S_first_size>(__from0, 719 __fromX...)}; 720 721 // place lower half of high half of __from0 into _R::first and recurse 722 // with _Offset 723 else if constexpr (_Offset * 2 == _V0VT::_S_full_size 724 && _V0VT::_S_full_size >= 4 * _R::_S_first_size) 725 return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), 726 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 727 _Offset + _R::_S_first_size>(__from0, 728 __fromX...)}; 729 730 // place high half of __from0 into _R::first and recurse with __fromX 731 else if constexpr (_Offset * 2 == _V0VT::_S_full_size 732 && _V0VT::_S_full_size / 2 >= _R::_S_first_size) 733 return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0)), 734 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 0>( 735 __fromX...)}; 736 737 // ill-formed if some unforseen pattern is needed 738 else 739 __assert_unreachable<_Tp>(); 740 } 741 } 742 743 template
744 _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> 745 __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX) 746 { 747 if constexpr (is_same_v<_Tp, _V>) 748 { 749 static_assert( 750 sizeof...(_VX) == 0, 751 "An array of scalars must be the last argument to __to_simd_tuple"); 752 return __call_with_subscripts( 753 __from, make_index_sequence<_NV>(), 754 [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 755 return __simd_tuple_concat( 756 _SimdTuple<_Tp, simd_abi::scalar>{__args}..., _SimdTuple<_Tp>()); 757 }); 758 } 759 else 760 return __call_with_subscripts( 761 __from, make_index_sequence<_NV>(), 762 [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 763 return __to_simd_tuple<_Tp, _Np>(__args..., __fromX...); 764 }); 765 } 766 767 template
768 using __to_tuple_helper = _Tp; 769 770 template
772 _GLIBCXX_SIMD_INTRINSIC __fixed_size_storage_t<_Tp, _NOut> 773 __to_simd_tuple_impl(index_sequence<_Indexes...>, 774 const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) 775 { 776 return __make_simd_tuple<_Tp, __to_tuple_helper<_Indexes, _A0>...>( 777 __args[_Indexes]...); 778 } 779 780 template
> 782 _GLIBCXX_SIMD_INTRINSIC _R 783 __to_simd_tuple_sized( 784 const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) 785 { 786 static_assert(_Np * simd_size_v<_Tp, _A0> >= _NOut); 787 return __to_simd_tuple_impl<_Tp, _A0, _NOut>( 788 make_index_sequence<_R::_S_tuple_size>(), __args); 789 } 790 791 // __optimize_simd_tuple {{{1 792 template
793 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp> 794 __optimize_simd_tuple(const _SimdTuple<_Tp>) 795 { return {}; } 796 797 template
798 _GLIBCXX_SIMD_INTRINSIC constexpr const _SimdTuple<_Tp, _Ap>& 799 __optimize_simd_tuple(const _SimdTuple<_Tp, _Ap>& __x) 800 { return __x; } 801 802 template
::_S_size()>> 805 _GLIBCXX_SIMD_INTRINSIC constexpr _R 806 __optimize_simd_tuple(const _SimdTuple<_Tp, _A0, _A1, _Abis...>& __x) 807 { 808 using _Tup = _SimdTuple<_Tp, _A0, _A1, _Abis...>; 809 if constexpr (is_same_v<_R, _Tup>) 810 return __x; 811 else if constexpr (is_same_v
) 813 return {__x.first, __optimize_simd_tuple(__x.second)}; 814 else if constexpr (__is_scalar_abi<_A0>() 815 || _A0::template _S_is_partial<_Tp>) 816 return {__generate_from_n_evaluations<_R::_S_first_size, 817 typename _R::_FirstType>( 818 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }), 819 __optimize_simd_tuple( 820 __simd_tuple_pop_front<_R::_S_first_size>(__x))}; 821 else if constexpr (is_same_v<_A0, _A1> 822 && _R::_S_first_size == simd_size_v<_Tp, _A0> + simd_size_v<_Tp, _A1>) 823 return {__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), 824 __optimize_simd_tuple(__x.second.second)}; 825 else if constexpr (sizeof...(_Abis) >= 2 826 && _R::_S_first_size == (4 * simd_size_v<_Tp, _A0>) 827 && simd_size_v<_Tp, _A0> == __simd_tuple_element_t< 828 (sizeof...(_Abis) >= 2 ? 3 : 0), _Tup>::size()) 829 return { 830 __concat(__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), 831 __concat(__x.template _M_at<2>(), __x.template _M_at<3>())), 832 __optimize_simd_tuple(__x.second.second.second.second)}; 833 else 834 { 835 static_assert(sizeof(_R) == sizeof(__x)); 836 _R __r; 837 __builtin_memcpy(__r._M_as_charptr(), __x._M_as_charptr(), 838 sizeof(_Tp) * _R::_S_size()); 839 return __r; 840 } 841 } 842 843 // __for_each(const _SimdTuple &, Fun) {{{1 844 template
845 _GLIBCXX_SIMD_INTRINSIC constexpr void 846 __for_each(const _SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) 847 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } 848 849 template
851 _GLIBCXX_SIMD_INTRINSIC constexpr void 852 __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) 853 { 854 __fun(__make_meta<_Offset>(__t), __t.first); 855 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, 856 static_cast<_Fp&&>(__fun)); 857 } 858 859 // __for_each(_SimdTuple &, Fun) {{{1 860 template
861 _GLIBCXX_SIMD_INTRINSIC constexpr void 862 __for_each(_SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) 863 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } 864 865 template
867 _GLIBCXX_SIMD_INTRINSIC constexpr void 868 __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) 869 { 870 __fun(__make_meta<_Offset>(__t), __t.first); 871 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, 872 static_cast<_Fp&&>(__fun)); 873 } 874 875 // __for_each(_SimdTuple &, const _SimdTuple &, Fun) {{{1 876 template
877 _GLIBCXX_SIMD_INTRINSIC constexpr void 878 __for_each(_SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) 879 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } 880 881 template
883 _GLIBCXX_SIMD_INTRINSIC constexpr void 884 __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __a, 885 const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) 886 { 887 __fun(__make_meta<_Offset>(__a), __a.first, __b.first); 888 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, 889 static_cast<_Fp&&>(__fun)); 890 } 891 892 // __for_each(const _SimdTuple &, const _SimdTuple &, Fun) {{{1 893 template
894 _GLIBCXX_SIMD_INTRINSIC constexpr void 895 __for_each(const _SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) 896 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } 897 898 template
900 _GLIBCXX_SIMD_INTRINSIC constexpr void 901 __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __a, 902 const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) 903 { 904 __fun(__make_meta<_Offset>(__a), __a.first, __b.first); 905 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, 906 static_cast<_Fp&&>(__fun)); 907 } 908 909 // }}}1 910 // __extract_part(_SimdTuple) {{{ 911 template
912 _GLIBCXX_SIMD_INTRINSIC constexpr auto // __vector_type_t or _SimdTuple 913 __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x) 914 { 915 // worst cases: 916 // (a) 4, 4, 4 => 3, 3, 3, 3 (_Total = 4) 917 // (b) 2, 2, 2 => 3, 3 (_Total = 2) 918 // (c) 4, 2 => 2, 2, 2 (_Total = 3) 919 using _Tuple = _SimdTuple<_Tp, _A0, _As...>; 920 static_assert(_Index + _Combine <= _Total && _Index >= 0 && _Total >= 1); 921 constexpr size_t _Np = _Tuple::_S_size(); 922 static_assert(_Np >= _Total && _Np % _Total == 0); 923 constexpr size_t __values_per_part = _Np / _Total; 924 [[maybe_unused]] constexpr size_t __values_to_skip 925 = _Index * __values_per_part; 926 constexpr size_t __return_size = __values_per_part * _Combine; 927 using _RetAbi = simd_abi::deduce_t<_Tp, __return_size>; 928 929 // handle (optimize) the simple cases 930 if constexpr (__return_size == 1) 931 return __x[integral_constant
()]; 932 else if constexpr (_Index == 0 && _Tuple::_S_first_size == __return_size) 933 return __x.first._M_data; 934 else if constexpr (_Index == 0 && _Total == _Combine) 935 return __x; 936 else if constexpr (_Index == 0 && _Tuple::_S_first_size >= __return_size) 937 return __intrin_bitcast<__vector_type_t<_Tp, __return_size>>( 938 __as_vector(__x.first)); 939 940 // recurse to skip unused data members at the beginning of _SimdTuple 941 else if constexpr (__values_to_skip >= _Tuple::_S_first_size) 942 { // recurse 943 if constexpr (_Tuple::_S_first_size % __values_per_part == 0) 944 { 945 constexpr int __parts_in_first 946 = _Tuple::_S_first_size / __values_per_part; 947 return __extract_part<_Index - __parts_in_first, 948 _Total - __parts_in_first, _Combine>( 949 __x.second); 950 } 951 else 952 return __extract_part<__values_to_skip - _Tuple::_S_first_size, 953 _Np - _Tuple::_S_first_size, __return_size>( 954 __x.second); 955 } 956 957 // extract from multiple _SimdTuple data members 958 else if constexpr (__return_size > _Tuple::_S_first_size - __values_to_skip) 959 { 960 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 961 const __may_alias<_Tp>* const element_ptr 962 = reinterpret_cast
*>(&__x) + __values_to_skip; 963 return __as_vector(simd<_Tp, _RetAbi>(element_ptr, element_aligned)); 964 #else 965 [[maybe_unused]] constexpr size_t __offset = __values_to_skip; 966 return __as_vector(simd<_Tp, _RetAbi>( 967 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 968 constexpr _SizeConstant<__i + __offset> __k; 969 return __x[__k]; 970 })); 971 #endif 972 } 973 974 // all of the return values are in __x.first 975 else if constexpr (_Tuple::_S_first_size % __values_per_part == 0) 976 return __extract_part<_Index, _Tuple::_S_first_size / __values_per_part, 977 _Combine>(__x.first); 978 else 979 return __extract_part<__values_to_skip, _Tuple::_S_first_size, 980 _Combine * __values_per_part>(__x.first); 981 } 982 983 // }}} 984 // __fixed_size_storage_t<_Tp, _Np>{{{ 985 template
>, 987 int _Remain = _Np - int(_Next::size())> 988 struct __fixed_size_storage_builder; 989 990 template
991 struct __fixed_size_storage 992 : public __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp>> {}; 993 994 template
995 struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, 996 0> 997 { using type = _SimdTuple<_Tp, _As..., typename _Next::abi_type>; }; 998 999 template
1000 struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, 1001 _Remain> 1002 { 1003 using type = typename __fixed_size_storage_builder< 1004 _Tp, _Remain, _SimdTuple<_Tp, _As..., typename _Next::abi_type>>::type; 1005 }; 1006 1007 // }}} 1008 // __autocvt_to_simd {{{ 1009 template
>> 1010 struct __autocvt_to_simd 1011 { 1012 _Tp _M_data; 1013 using _TT = __remove_cvref_t<_Tp>; 1014 1015 _GLIBCXX_SIMD_INTRINSIC constexpr 1016 operator _TT() 1017 { return _M_data; } 1018 1019 _GLIBCXX_SIMD_INTRINSIC constexpr 1020 operator _TT&() 1021 { 1022 static_assert(is_lvalue_reference<_Tp>::value, ""); 1023 static_assert(!is_const<_Tp>::value, ""); 1024 return _M_data; 1025 } 1026 1027 _GLIBCXX_SIMD_INTRINSIC constexpr 1028 operator _TT*() 1029 { 1030 static_assert(is_lvalue_reference<_Tp>::value, ""); 1031 static_assert(!is_const<_Tp>::value, ""); 1032 return &_M_data; 1033 } 1034 1035 _GLIBCXX_SIMD_INTRINSIC constexpr 1036 __autocvt_to_simd(_Tp dd) : _M_data(dd) {} 1037 1038 template
1039 _GLIBCXX_SIMD_INTRINSIC constexpr 1040 operator simd
() 1041 { return {__private_init, _M_data}; } 1042 1043 template
1044 _GLIBCXX_SIMD_INTRINSIC constexpr 1045 operator simd
&() 1046 { return *reinterpret_cast
*>(&_M_data); } 1047 1048 template
1049 _GLIBCXX_SIMD_INTRINSIC constexpr 1050 operator simd
*() 1051 { return reinterpret_cast
*>(&_M_data); } 1052 }; 1053 1054 template
1055 __autocvt_to_simd(_Tp &&) -> __autocvt_to_simd<_Tp>; 1056 1057 template
1058 struct __autocvt_to_simd<_Tp, true> 1059 { 1060 using _TT = __remove_cvref_t<_Tp>; 1061 _Tp _M_data; 1062 fixed_size_simd<_TT, 1> _M_fd; 1063 1064 _GLIBCXX_SIMD_INTRINSIC 1065 constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {} 1066 1067 _GLIBCXX_SIMD_INTRINSIC 1068 ~__autocvt_to_simd() 1069 { _M_data = __data(_M_fd).first; } 1070 1071 _GLIBCXX_SIMD_INTRINSIC constexpr 1072 operator fixed_size_simd<_TT, 1>() 1073 { return _M_fd; } 1074 1075 _GLIBCXX_SIMD_INTRINSIC constexpr 1076 operator fixed_size_simd<_TT, 1> &() 1077 { 1078 static_assert(is_lvalue_reference<_Tp>::value, ""); 1079 static_assert(!is_const<_Tp>::value, ""); 1080 return _M_fd; 1081 } 1082 1083 _GLIBCXX_SIMD_INTRINSIC constexpr 1084 operator fixed_size_simd<_TT, 1> *() 1085 { 1086 static_assert(is_lvalue_reference<_Tp>::value, ""); 1087 static_assert(!is_const<_Tp>::value, ""); 1088 return &_M_fd; 1089 } 1090 }; 1091 1092 // }}} 1093 1094 struct _CommonImplFixedSize; 1095 template
struct _SimdImplFixedSize; 1096 template
struct _MaskImplFixedSize; 1097 // simd_abi::_Fixed {{{ 1098 template
1099 struct simd_abi::_Fixed 1100 { 1101 template
static constexpr size_t _S_size = _Np; 1102 template
static constexpr size_t _S_full_size = _Np; 1103 // validity traits {{{ 1104 struct _IsValidAbiTag : public __bool_constant<(_Np > 0)> {}; 1105 1106 template
1107 struct _IsValidSizeFor 1108 : __bool_constant<(_Np <= simd_abi::max_fixed_size<_Tp>)> {}; 1109 1110 template
1111 struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>, 1112 _IsValidSizeFor<_Tp>> {}; 1113 1114 template
1115 static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value; 1116 1117 // }}} 1118 // _S_masked {{{ 1119 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 1120 _S_masked(_BitMask<_Np> __x) 1121 { return __x._M_sanitized(); } 1122 1123 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 1124 _S_masked(_SanitizedBitMask<_Np> __x) 1125 { return __x; } 1126 1127 // }}} 1128 // _*Impl {{{ 1129 using _CommonImpl = _CommonImplFixedSize; 1130 using _SimdImpl = _SimdImplFixedSize<_Np>; 1131 using _MaskImpl = _MaskImplFixedSize<_Np>; 1132 1133 // }}} 1134 // __traits {{{ 1135 template
> 1136 struct __traits : _InvalidTraits {}; 1137 1138 template
1139 struct __traits<_Tp, true> 1140 { 1141 using _IsValid = true_type; 1142 using _SimdImpl = _SimdImplFixedSize<_Np>; 1143 using _MaskImpl = _MaskImplFixedSize<_Np>; 1144 1145 // simd and simd_mask member types {{{ 1146 using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; 1147 using _MaskMember = _SanitizedBitMask<_Np>; 1148 1149 static constexpr size_t _S_simd_align 1150 = std::__bit_ceil(_Np * sizeof(_Tp)); 1151 1152 static constexpr size_t _S_mask_align = alignof(_MaskMember); 1153 1154 // }}} 1155 // _SimdBase / base class for simd, providing extra conversions {{{ 1156 struct _SimdBase 1157 { 1158 // The following ensures, function arguments are passed via the stack. 1159 // This is important for ABI compatibility across TU boundaries 1160 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr 1161 _SimdBase(const _SimdBase&) {} 1162 1163 _SimdBase() = default; 1164 1165 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit 1166 operator const _SimdMember &() const 1167 { return static_cast
*>(this)->_M_data; } 1168 1169 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit 1170 operator array<_Tp, _Np>() const 1171 { 1172 array<_Tp, _Np> __r; 1173 // _SimdMember can be larger because of higher alignment 1174 static_assert(sizeof(__r) <= sizeof(_SimdMember), ""); 1175 __builtin_memcpy(__r.data(), &static_cast
(*this), 1176 sizeof(__r)); 1177 return __r; 1178 } 1179 }; 1180 1181 // }}} 1182 // _MaskBase {{{ 1183 // empty. The bitset interface suffices 1184 struct _MaskBase {}; 1185 1186 // }}} 1187 // _SimdCastType {{{ 1188 struct _SimdCastType 1189 { 1190 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr 1191 _SimdCastType(const array<_Tp, _Np>&); 1192 1193 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr 1194 _SimdCastType(const _SimdMember& dd) : _M_data(dd) {} 1195 1196 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit 1197 operator const _SimdMember &() const { return _M_data; } 1198 1199 private: 1200 const _SimdMember& _M_data; 1201 }; 1202 1203 // }}} 1204 // _MaskCastType {{{ 1205 class _MaskCastType 1206 { 1207 _MaskCastType() = delete; 1208 }; 1209 // }}} 1210 }; 1211 // }}} 1212 }; 1213 1214 // }}} 1215 // _CommonImplFixedSize {{{ 1216 struct _CommonImplFixedSize 1217 { 1218 // _S_store {{{ 1219 template
1220 _GLIBCXX_SIMD_INTRINSIC static void 1221 _S_store(const _SimdTuple<_Tp, _As...>& __x, void* __addr) 1222 { 1223 constexpr size_t _Np = _SimdTuple<_Tp, _As...>::_S_size(); 1224 __builtin_memcpy(__addr, &__x, _Np * sizeof(_Tp)); 1225 } 1226 1227 // }}} 1228 }; 1229 1230 // }}} 1231 // _SimdImplFixedSize {{{1 1232 // fixed_size should not inherit from _SimdMathFallback in order for 1233 // specializations in the used _SimdTuple Abis to get used 1234 template
1235 struct _SimdImplFixedSize 1236 { 1237 // member types {{{2 1238 using _MaskMember = _SanitizedBitMask<_Np>; 1239 1240 template
1241 using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; 1242 1243 template
1244 static constexpr size_t _S_tuple_size = _SimdMember<_Tp>::_S_tuple_size; 1245 1246 template
1247 using _Simd = simd<_Tp, simd_abi::fixed_size<_Np>>; 1248 1249 template
1250 using _TypeTag = _Tp*; 1251 1252 // broadcast {{{2 1253 template
1254 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 1255 _S_broadcast(_Tp __x) noexcept 1256 { 1257 return _SimdMember<_Tp>::_S_generate( 1258 [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1259 return __meta._S_broadcast(__x); 1260 }); 1261 } 1262 1263 // _S_generator {{{2 1264 template
1265 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 1266 _S_generator(_Fp&& __gen, _TypeTag<_Tp>) 1267 { 1268 return _SimdMember<_Tp>::_S_generate( 1269 [&__gen](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1270 return __meta._S_generator( 1271 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1272 return __i < _Np ? __gen(_SizeConstant<__meta._S_offset + __i>()) 1273 : 0; 1274 }, 1275 _TypeTag<_Tp>()); 1276 }); 1277 } 1278 1279 // _S_load {{{2 1280 template
1281 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 1282 _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept 1283 { 1284 return _SimdMember<_Tp>::_S_generate( 1285 [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1286 return __meta._S_load(&__mem[__meta._S_offset], _TypeTag<_Tp>()); 1287 }); 1288 } 1289 1290 // _S_masked_load {{{2 1291 template
1292 _GLIBCXX_SIMD_INTRINSIC static _SimdTuple<_Tp, _As...> 1293 _S_masked_load(const _SimdTuple<_Tp, _As...>& __old, 1294 const _MaskMember __bits, const _Up* __mem) noexcept 1295 { 1296 auto __merge = __old; 1297 __for_each(__merge, [&](auto __meta, auto& __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1298 if (__meta._S_submask(__bits).any()) 1299 #pragma GCC diagnostic push 1300 // Dereferencing __mem + __meta._S_offset could be UB ([expr.add]/4.3). 1301 // It is the responsibility of the caller of the masked load (via the mask's value) to 1302 // avoid UB. Consequently, the compiler may assume this branch is unreachable, if the 1303 // pointer arithmetic is UB. 1304 #pragma GCC diagnostic ignored "-Warray-bounds" 1305 __native 1306 = __meta._S_masked_load(__native, __meta._S_make_mask(__bits), 1307 __mem + __meta._S_offset); 1308 #pragma GCC diagnostic pop 1309 }); 1310 return __merge; 1311 } 1312 1313 // _S_store {{{2 1314 template
1315 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1316 _S_store(const _SimdMember<_Tp>& __v, _Up* __mem, _TypeTag<_Tp>) noexcept 1317 { 1318 __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1319 __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>()); 1320 }); 1321 } 1322 1323 // _S_masked_store {{{2 1324 template
1325 _GLIBCXX_SIMD_INTRINSIC static void 1326 _S_masked_store(const _SimdTuple<_Tp, _As...>& __v, _Up* __mem, 1327 const _MaskMember __bits) noexcept 1328 { 1329 __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1330 if (__meta._S_submask(__bits).any()) 1331 #pragma GCC diagnostic push 1332 // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts 1333 // the responsibility for avoiding UB to the caller of the masked 1334 // store via the mask. Consequently, the compiler may assume this 1335 // branch is unreachable, if the pointer arithmetic is UB. 1336 #pragma GCC diagnostic ignored "-Warray-bounds" 1337 __meta._S_masked_store(__native, __mem + __meta._S_offset, 1338 __meta._S_make_mask(__bits)); 1339 #pragma GCC diagnostic pop 1340 }); 1341 } 1342 1343 // negation {{{2 1344 template
1345 static constexpr inline _MaskMember 1346 _S_negate(const _SimdTuple<_Tp, _As...>& __x) noexcept 1347 { 1348 _MaskMember __bits = 0; 1349 __for_each( 1350 __x, [&__bits](auto __meta, auto __native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1351 __bits 1352 |= __meta._S_mask_to_shifted_ullong(__meta._S_negate(__native)); 1353 }); 1354 return __bits; 1355 } 1356 1357 // reductions {{{2 1358 template
1359 static constexpr inline _Tp _S_reduce(const _Simd<_Tp>& __x, 1360 const _BinaryOperation& __binary_op) 1361 { 1362 using _Tup = _SimdMember<_Tp>; 1363 const _Tup& __tup = __data(__x); 1364 if constexpr (_Tup::_S_tuple_size == 1) 1365 return _Tup::_FirstAbi::_SimdImpl::_S_reduce( 1366 __tup.template _M_simd_at<0>(), __binary_op); 1367 else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 2 1368 && _Tup::_SecondType::_S_size() == 1) 1369 { 1370 return __binary_op(simd<_Tp, simd_abi::scalar>( 1371 reduce(__tup.template _M_simd_at<0>(), 1372 __binary_op)), 1373 __tup.template _M_simd_at<1>())[0]; 1374 } 1375 else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 4 1376 && _Tup::_SecondType::_S_size() == 2) 1377 { 1378 return __binary_op( 1379 simd<_Tp, simd_abi::scalar>( 1380 reduce(__tup.template _M_simd_at<0>(), __binary_op)), 1381 simd<_Tp, simd_abi::scalar>( 1382 reduce(__tup.template _M_simd_at<1>(), __binary_op)))[0]; 1383 } 1384 else 1385 { 1386 const auto& __x2 = __call_with_n_evaluations< 1387 __div_roundup(_Tup::_S_tuple_size, 2)>( 1388 [](auto __first_simd, auto... __remaining) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1389 if constexpr (sizeof...(__remaining) == 0) 1390 return __first_simd; 1391 else 1392 { 1393 using _Tup2 1394 = _SimdTuple<_Tp, 1395 typename decltype(__first_simd)::abi_type, 1396 typename decltype(__remaining)::abi_type...>; 1397 return fixed_size_simd<_Tp, _Tup2::_S_size()>( 1398 __private_init, 1399 __make_simd_tuple(__first_simd, __remaining...)); 1400 } 1401 }, 1402 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1403 auto __left = __tup.template _M_simd_at<2 * __i>(); 1404 if constexpr (2 * __i + 1 == _Tup::_S_tuple_size) 1405 return __left; 1406 else 1407 { 1408 auto __right = __tup.template _M_simd_at<2 * __i + 1>(); 1409 using _LT = decltype(__left); 1410 using _RT = decltype(__right); 1411 if constexpr (_LT::size() == _RT::size()) 1412 return __binary_op(__left, __right); 1413 else 1414 { 1415 _GLIBCXX_SIMD_USE_CONSTEXPR_API 1416 typename _LT::mask_type __k( 1417 __private_init, 1418 [](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1419 return __j < _RT::size(); 1420 }); 1421 _LT __ext_right = __left; 1422 where(__k, __ext_right) 1423 = __proposed::resizing_simd_cast<_LT>(__right); 1424 where(__k, __left) = __binary_op(__left, __ext_right); 1425 return __left; 1426 } 1427 } 1428 }); 1429 return reduce(__x2, __binary_op); 1430 } 1431 } 1432 1433 // _S_min, _S_max {{{2 1434 template
1435 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1436 _S_min(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) 1437 { 1438 return __a._M_apply_per_chunk( 1439 [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1440 return __impl._S_min(__aa, __bb); 1441 }, 1442 __b); 1443 } 1444 1445 template
1446 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1447 _S_max(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) 1448 { 1449 return __a._M_apply_per_chunk( 1450 [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1451 return __impl._S_max(__aa, __bb); 1452 }, 1453 __b); 1454 } 1455 1456 // _S_complement {{{2 1457 template
1458 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1459 _S_complement(const _SimdTuple<_Tp, _As...>& __x) noexcept 1460 { 1461 return __x._M_apply_per_chunk( 1462 [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1463 return __impl._S_complement(__xx); 1464 }); 1465 } 1466 1467 // _S_unary_minus {{{2 1468 template
1469 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1470 _S_unary_minus(const _SimdTuple<_Tp, _As...>& __x) noexcept 1471 { 1472 return __x._M_apply_per_chunk( 1473 [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1474 return __impl._S_unary_minus(__xx); 1475 }); 1476 } 1477 1478 // arithmetic operators {{{2 1479 1480 #define _GLIBCXX_SIMD_FIXED_OP(name_, op_) \ 1481 template
\ 1482 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> name_( \ 1483 const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y) \ 1484 { \ 1485 return __x._M_apply_per_chunk( \ 1486 [](auto __impl, auto __xx, auto __yy) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ 1487 return __impl.name_(__xx, __yy); \ 1488 }, \ 1489 __y); \ 1490 } 1491 1492 _GLIBCXX_SIMD_FIXED_OP(_S_plus, +) 1493 _GLIBCXX_SIMD_FIXED_OP(_S_minus, -) 1494 _GLIBCXX_SIMD_FIXED_OP(_S_multiplies, *) 1495 _GLIBCXX_SIMD_FIXED_OP(_S_divides, /) 1496 _GLIBCXX_SIMD_FIXED_OP(_S_modulus, %) 1497 _GLIBCXX_SIMD_FIXED_OP(_S_bit_and, &) 1498 _GLIBCXX_SIMD_FIXED_OP(_S_bit_or, |) 1499 _GLIBCXX_SIMD_FIXED_OP(_S_bit_xor, ^) 1500 _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_left, <<) 1501 _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_right, >>) 1502 #undef _GLIBCXX_SIMD_FIXED_OP 1503 1504 template
1505 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1506 _S_bit_shift_left(const _SimdTuple<_Tp, _As...>& __x, int __y) 1507 { 1508 return __x._M_apply_per_chunk( 1509 [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1510 return __impl._S_bit_shift_left(__xx, __y); 1511 }); 1512 } 1513 1514 template
1515 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1516 _S_bit_shift_right(const _SimdTuple<_Tp, _As...>& __x, int __y) 1517 { 1518 return __x._M_apply_per_chunk( 1519 [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1520 return __impl._S_bit_shift_right(__xx, __y); 1521 }); 1522 } 1523 1524 // math {{{2 1525 #define _GLIBCXX_SIMD_APPLY_ON_TUPLE(_RetTp, __name) \ 1526 template
\ 1527 static inline __fixed_size_storage_t<_RetTp, _Np> \ 1528 _S_##__name(const _SimdTuple<_Tp, _As...>& __x, \ 1529 const _More&... __more) \ 1530 { \ 1531 if constexpr (sizeof...(_More) == 0) \ 1532 { \ 1533 if constexpr (is_same_v<_Tp, _RetTp>) \ 1534 return __x._M_apply_per_chunk( \ 1535 [](auto __impl, auto __xx) \ 1536 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1537 { \ 1538 using _V = typename decltype(__impl)::simd_type; \ 1539 return __data(__name(_V(__private_init, __xx))); \ 1540 }); \ 1541 else \ 1542 return __optimize_simd_tuple( \ 1543 __x.template _M_apply_r<_RetTp>( \ 1544 [](auto __impl, auto __xx) \ 1545 _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1546 { return __impl._S_##__name(__xx); })); \ 1547 } \ 1548 else if constexpr ( \ 1549 is_same_v< \ 1550 _Tp, \ 1551 _RetTp> && (... && is_same_v<_SimdTuple<_Tp, _As...>, _More>) ) \ 1552 return __x._M_apply_per_chunk( \ 1553 [](auto __impl, auto __xx, auto... __pack) \ 1554 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1555 { \ 1556 using _V = typename decltype(__impl)::simd_type; \ 1557 return __data(__name(_V(__private_init, __xx), \ 1558 _V(__private_init, __pack)...)); \ 1559 }, __more...); \ 1560 else if constexpr (is_same_v<_Tp, _RetTp>) \ 1561 return __x._M_apply_per_chunk( \ 1562 [](auto __impl, auto __xx, auto... __pack) \ 1563 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1564 { \ 1565 using _V = typename decltype(__impl)::simd_type; \ 1566 return __data(__name(_V(__private_init, __xx), \ 1567 __autocvt_to_simd(__pack)...)); \ 1568 }, __more...); \ 1569 else \ 1570 __assert_unreachable<_Tp>(); \ 1571 } 1572 1573 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acos) 1574 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asin) 1575 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan) 1576 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan2) 1577 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cos) 1578 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sin) 1579 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tan) 1580 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acosh) 1581 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asinh) 1582 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atanh) 1583 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cosh) 1584 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sinh) 1585 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tanh) 1586 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp) 1587 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp2) 1588 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, expm1) 1589 _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, ilogb) 1590 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log) 1591 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log10) 1592 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log1p) 1593 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log2) 1594 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, logb) 1595 // modf implemented in simd_math.h 1596 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, 1597 scalbn) // double scalbn(double x, int exp); 1598 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, scalbln) 1599 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cbrt) 1600 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, abs) 1601 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fabs) 1602 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, pow) 1603 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sqrt) 1604 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erf) 1605 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erfc) 1606 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, lgamma) 1607 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tgamma) 1608 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, trunc) 1609 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ceil) 1610 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, floor) 1611 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nearbyint) 1612 1613 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, rint) 1614 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lrint) 1615 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llrint) 1616 1617 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, round) 1618 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lround) 1619 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llround) 1620 1621 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ldexp) 1622 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmod) 1623 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, remainder) 1624 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, copysign) 1625 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nextafter) 1626 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fdim) 1627 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmax) 1628 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmin) 1629 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fma) 1630 _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, fpclassify) 1631 #undef _GLIBCXX_SIMD_APPLY_ON_TUPLE 1632 1633 template
1634 static inline _SimdTuple<_Tp, _Abis...> 1635 _S_remquo(const _SimdTuple<_Tp, _Abis...>& __x, const _SimdTuple<_Tp, _Abis...>& __y, 1636 __fixed_size_storage_t
::_S_size()>* __z) 1637 { 1638 return __x._M_apply_per_chunk( 1639 [](auto __impl, const auto __xx, const auto __yy, auto& __zz) 1640 _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 1641 { return __impl._S_remquo(__xx, __yy, &__zz); }, 1642 __y, *__z); 1643 } 1644 1645 template
1646 static inline _SimdTuple<_Tp, _As...> 1647 _S_frexp(const _SimdTuple<_Tp, _As...>& __x, 1648 __fixed_size_storage_t
& __exp) noexcept 1649 { 1650 return __x._M_apply_per_chunk( 1651 [](auto __impl, const auto& __a, auto& __b) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1652 return __data(frexp(typename decltype(__impl)::simd_type(__private_init, __a), 1653 __autocvt_to_simd(__b))); 1654 }, __exp); 1655 } 1656 1657 #define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_) \ 1658 template
\ 1659 static inline _MaskMember \ 1660 _S_##name_(const _SimdTuple<_Tp, _As...>& __x) noexcept \ 1661 { \ 1662 return _M_test([] (auto __impl, auto __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ 1663 return __impl._S_##name_(__xx); \ 1664 }, __x); \ 1665 } 1666 1667 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isinf) 1668 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isfinite) 1669 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnan) 1670 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnormal) 1671 _GLIBCXX_SIMD_TEST_ON_TUPLE_(signbit) 1672 #undef _GLIBCXX_SIMD_TEST_ON_TUPLE_ 1673 1674 // _S_increment & _S_decrement{{{2 1675 template
1676 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1677 _S_increment(_SimdTuple<_Ts...>& __x) 1678 { 1679 __for_each( 1680 __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1681 __meta._S_increment(native); 1682 }); 1683 } 1684 1685 template
1686 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1687 _S_decrement(_SimdTuple<_Ts...>& __x) 1688 { 1689 __for_each( 1690 __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1691 __meta._S_decrement(native); 1692 }); 1693 } 1694 1695 // compares {{{2 1696 #define _GLIBCXX_SIMD_CMP_OPERATIONS(__cmp) \ 1697 template
\ 1698 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember \ 1699 __cmp(const _SimdTuple<_Tp, _As...>& __x, \ 1700 const _SimdTuple<_Tp, _As...>& __y) \ 1701 { \ 1702 return _M_test([](auto __impl, auto __xx, auto __yy) \ 1703 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1704 { return __impl.__cmp(__xx, __yy); }, \ 1705 __x, __y); \ 1706 } 1707 1708 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_equal_to) 1709 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_not_equal_to) 1710 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less) 1711 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less_equal) 1712 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isless) 1713 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessequal) 1714 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreater) 1715 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreaterequal) 1716 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessgreater) 1717 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isunordered) 1718 #undef _GLIBCXX_SIMD_CMP_OPERATIONS 1719 1720 // smart_reference access {{{2 1721 template
1722 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1723 _S_set(_SimdTuple<_Tp, _As...>& __v, int __i, _Up&& __x) noexcept 1724 { __v._M_set(__i, static_cast<_Up&&>(__x)); } 1725 1726 // _S_masked_assign {{{2 1727 template
1728 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1729 _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1730 const __type_identity_t<_SimdTuple<_Tp, _As...>>& __rhs) 1731 { 1732 __for_each(__lhs, __rhs, 1733 [&](auto __meta, auto& __native_lhs, auto __native_rhs) 1734 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 1735 { 1736 __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, 1737 __native_rhs); 1738 }); 1739 } 1740 1741 // Optimization for the case where the RHS is a scalar. No need to broadcast 1742 // the scalar to a simd first. 1743 template
1744 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1745 _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1746 const __type_identity_t<_Tp> __rhs) 1747 { 1748 __for_each( 1749 __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1750 __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, 1751 __rhs); 1752 }); 1753 } 1754 1755 // _S_masked_cassign {{{2 1756 template
1757 static constexpr inline void 1758 _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1759 const _SimdTuple<_Tp, _As...>& __rhs, _Op __op) 1760 { 1761 __for_each(__lhs, __rhs, 1762 [&](auto __meta, auto& __native_lhs, auto __native_rhs) 1763 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 1764 { 1765 __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), 1766 __native_lhs, __native_rhs, __op); 1767 }); 1768 } 1769 1770 // Optimization for the case where the RHS is a scalar. No need to broadcast 1771 // the scalar to a simd first. 1772 template
1773 static constexpr inline void 1774 _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1775 const _Tp& __rhs, _Op __op) 1776 { 1777 __for_each( 1778 __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1779 __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), 1780 __native_lhs, __rhs, __op); 1781 }); 1782 } 1783 1784 // _S_masked_unary {{{2 1785 template
class _Op, typename _Tp, typename... _As> 1786 static constexpr inline _SimdTuple<_Tp, _As...> 1787 _S_masked_unary(const _MaskMember __bits, const _SimdTuple<_Tp, _As...>& __v) 1788 { 1789 return __v._M_apply_wrapped([&__bits](auto __meta, 1790 auto __native) constexpr { 1791 return __meta.template _S_masked_unary<_Op>(__meta._S_make_mask( 1792 __bits), 1793 __native); 1794 }); 1795 } 1796 1797 // }}}2 1798 }; 1799 1800 // _MaskImplFixedSize {{{1 1801 template
1802 struct _MaskImplFixedSize 1803 { 1804 static_assert( 1805 sizeof(_ULLong) * __CHAR_BIT__ >= _Np, 1806 "The fixed_size implementation relies on one _ULLong being able to store " 1807 "all boolean elements."); // required in load & store 1808 1809 // member types {{{ 1810 using _Abi = simd_abi::fixed_size<_Np>; 1811 1812 using _MaskMember = _SanitizedBitMask<_Np>; 1813 1814 template
1815 using _FirstAbi = typename __fixed_size_storage_t<_Tp, _Np>::_FirstAbi; 1816 1817 template
1818 using _TypeTag = _Tp*; 1819 1820 // }}} 1821 // _S_broadcast {{{ 1822 template
1823 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1824 _S_broadcast(bool __x) 1825 { return __x ? ~_MaskMember() : _MaskMember(); } 1826 1827 // }}} 1828 // _S_load {{{ 1829 template
1830 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1831 _S_load(const bool* __mem) 1832 { 1833 if (__builtin_is_constant_evaluated()) 1834 { 1835 _MaskMember __r{}; 1836 for (size_t __i = 0; __i < _Np; ++__i) 1837 __r.set(__i, __mem[__i]); 1838 return __r; 1839 } 1840 using _Ip = __int_for_sizeof_t
; 1841 // the following load uses element_aligned and relies on __mem already 1842 // carrying alignment information from when this load function was 1843 // called. 1844 const simd<_Ip, _Abi> __bools(reinterpret_cast
*>( 1845 __mem), 1846 element_aligned); 1847 return __data(__bools != 0); 1848 } 1849 1850 // }}} 1851 // _S_to_bits {{{ 1852 template
1853 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 1854 _S_to_bits(_BitMask<_Np, _Sanitized> __x) 1855 { 1856 if constexpr (_Sanitized) 1857 return __x; 1858 else 1859 return __x._M_sanitized(); 1860 } 1861 1862 // }}} 1863 // _S_convert {{{ 1864 template
1865 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1866 _S_convert(simd_mask<_Up, _UAbi> __x) 1867 { 1868 return _UAbi::_MaskImpl::_S_to_bits(__data(__x)) 1869 .template _M_extract<0, _Np>(); 1870 } 1871 1872 // }}} 1873 // _S_from_bitmask {{{2 1874 template
1875 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1876 _S_from_bitmask(_MaskMember __bits, _TypeTag<_Tp>) noexcept 1877 { return __bits; } 1878 1879 // _S_load {{{2 1880 static constexpr inline _MaskMember 1881 _S_load(const bool* __mem) noexcept 1882 { 1883 // TODO: _UChar is not necessarily the best type to use here. For smaller 1884 // _Np _UShort, _UInt, _ULLong, float, and double can be more efficient. 1885 _ULLong __r = 0; 1886 using _Vs = __fixed_size_storage_t<_UChar, _Np>; 1887 __for_each(_Vs{}, [&](auto __meta, auto) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1888 __r |= __meta._S_mask_to_shifted_ullong( 1889 __meta._S_mask_impl._S_load(&__mem[__meta._S_offset], 1890 _SizeConstant<__meta._S_size()>())); 1891 }); 1892 return __r; 1893 } 1894 1895 // _S_masked_load {{{2 1896 static constexpr inline _MaskMember 1897 _S_masked_load(_MaskMember __merge, _MaskMember __mask, const bool* __mem) noexcept 1898 { 1899 _BitOps::_S_bit_iteration(__mask.to_ullong(), 1900 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1901 __merge.set(__i, __mem[__i]); 1902 }); 1903 return __merge; 1904 } 1905 1906 // _S_store {{{2 1907 static constexpr inline void 1908 _S_store(const _MaskMember __bitmask, bool* __mem) noexcept 1909 { 1910 if constexpr (_Np == 1) 1911 __mem[0] = __bitmask[0]; 1912 else 1913 _FirstAbi<_UChar>::_CommonImpl::_S_store_bool_array(__bitmask, __mem); 1914 } 1915 1916 // _S_masked_store {{{2 1917 static constexpr inline void 1918 _S_masked_store(const _MaskMember __v, bool* __mem, const _MaskMember __k) noexcept 1919 { 1920 _BitOps::_S_bit_iteration( 1921 __k, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __mem[__i] = __v[__i]; }); 1922 } 1923 1924 // logical and bitwise operators {{{2 1925 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1926 _S_logical_and(const _MaskMember& __x, const _MaskMember& __y) noexcept 1927 { return __x & __y; } 1928 1929 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1930 _S_logical_or(const _MaskMember& __x, const _MaskMember& __y) noexcept 1931 { return __x | __y; } 1932 1933 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1934 _S_bit_not(const _MaskMember& __x) noexcept 1935 { return ~__x; } 1936 1937 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1938 _S_bit_and(const _MaskMember& __x, const _MaskMember& __y) noexcept 1939 { return __x & __y; } 1940 1941 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1942 _S_bit_or(const _MaskMember& __x, const _MaskMember& __y) noexcept 1943 { return __x | __y; } 1944 1945 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1946 _S_bit_xor(const _MaskMember& __x, const _MaskMember& __y) noexcept 1947 { return __x ^ __y; } 1948 1949 // smart_reference access {{{2 1950 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1951 _S_set(_MaskMember& __k, int __i, bool __x) noexcept 1952 { __k.set(__i, __x); } 1953 1954 // _S_masked_assign {{{2 1955 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1956 _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const _MaskMember __rhs) 1957 { __lhs = (__lhs & ~__k) | (__rhs & __k); } 1958 1959 // Optimization for the case where the RHS is a scalar. 1960 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1961 _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const bool __rhs) 1962 { 1963 if (__rhs) 1964 __lhs |= __k; 1965 else 1966 __lhs &= ~__k; 1967 } 1968 1969 // }}}2 1970 // _S_all_of {{{ 1971 template
1972 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 1973 _S_all_of(simd_mask<_Tp, _Abi> __k) 1974 { return __data(__k).all(); } 1975 1976 // }}} 1977 // _S_any_of {{{ 1978 template
1979 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 1980 _S_any_of(simd_mask<_Tp, _Abi> __k) 1981 { return __data(__k).any(); } 1982 1983 // }}} 1984 // _S_none_of {{{ 1985 template
1986 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 1987 _S_none_of(simd_mask<_Tp, _Abi> __k) 1988 { return __data(__k).none(); } 1989 1990 // }}} 1991 // _S_some_of {{{ 1992 template
1993 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 1994 _S_some_of([[maybe_unused]] simd_mask<_Tp, _Abi> __k) 1995 { 1996 if constexpr (_Np == 1) 1997 return false; 1998 else 1999 return __data(__k).any() && !__data(__k).all(); 2000 } 2001 2002 // }}} 2003 // _S_popcount {{{ 2004 template
2005 _GLIBCXX_SIMD_INTRINSIC static constexpr int 2006 _S_popcount(simd_mask<_Tp, _Abi> __k) 2007 { return __data(__k).count(); } 2008 2009 // }}} 2010 // _S_find_first_set {{{ 2011 template
2012 _GLIBCXX_SIMD_INTRINSIC static constexpr int 2013 _S_find_first_set(simd_mask<_Tp, _Abi> __k) 2014 { return std::__countr_zero(__data(__k).to_ullong()); } 2015 2016 // }}} 2017 // _S_find_last_set {{{ 2018 template
2019 _GLIBCXX_SIMD_INTRINSIC static constexpr int 2020 _S_find_last_set(simd_mask<_Tp, _Abi> __k) 2021 { return std::__bit_width(__data(__k).to_ullong()) - 1; } 2022 2023 // }}} 2024 }; 2025 // }}}1 2026 2027 _GLIBCXX_SIMD_END_NAMESPACE 2028 #endif // __cplusplus >= 201703L 2029 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ 2030 2031 // vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
Contact us
|
About us
|
Term of use
|
Copyright © 2000-2025 MyWebUniversity.com ™