Where Online Learning is simpler!

The C and C++ Include Header Files

/usr/include/c++/13/parallel/multiway_mergesort.h


$ cat -n /usr/include/c++/13/parallel/multiway_mergesort.h

     1	// -*- C++ -*-
     2	
     3	// Copyright (C) 2007-2023 Free Software Foundation, Inc.
     4	//
     5	// This file is part of the GNU ISO C++ Library.  This library is free
     6	// software; you can redistribute it and/or modify it under the terms
     7	// of the GNU General Public License as published by the Free Software
     8	// Foundation; either version 3, or (at your option) any later
     9	// version.
    10	
    11	// This library is distributed in the hope that it will be useful, but
    12	// WITHOUT ANY WARRANTY; without even the implied warranty of
    13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    14	// General Public License for more details.
    15	
    16	// Under Section 7 of GPL version 3, you are granted additional
    17	// permissions described in the GCC Runtime Library Exception, version
    18	// 3.1, as published by the Free Software Foundation.
    19	
    20	// You should have received a copy of the GNU General Public License and
    21	// a copy of the GCC Runtime Library Exception along with this program;
    22	// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    23	// <http://www.gnu.org/licenses/>.
    24	
    25	/** @file parallel/multiway_mergesort.h
    26	 *  @brief Parallel multiway merge sort.
    27	 *  This file is a GNU parallel extension to the Standard C++ Library.
    28	 */
    29	
    30	// Written by Johannes Singler.
    31	
    32	#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
    33	#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
    34	
    35	#include <vector>
    36	
    37	#include <parallel/basic_iterator.h>
    38	#include <bits/stl_algo.h>
    39	#include <parallel/parallel.h>
    40	#include <parallel/multiway_merge.h>
    41	
    42	namespace __gnu_parallel
    43	{
    44	  /** @brief Subsequence description. */
    45	  template<typename _DifferenceTp>
    46	    struct _Piece
    47	    {
    48	      typedef _DifferenceTp _DifferenceType;
    49	
    50	      /** @brief Begin of subsequence. */
    51	      _DifferenceType _M_begin;
    52	
    53	      /** @brief End of subsequence. */
    54	      _DifferenceType _M_end;
    55	    };
    56	
    57	  /** @brief Data accessed by all threads.
    58	   *
    59	   *  PMWMS = parallel multiway mergesort */
    60	  template<typename _RAIter>
    61	    struct _PMWMSSortingData
    62	    {
    63	      typedef std::iterator_traits<_RAIter> _TraitsType;
    64	      typedef typename _TraitsType::value_type _ValueType;
    65	      typedef typename _TraitsType::difference_type _DifferenceType;
    66	
    67	      /** @brief Number of threads involved. */
    68	      _ThreadIndex _M_num_threads;
    69	
    70	      /** @brief Input __begin. */
    71	      _RAIter _M_source;
    72	
    73	      /** @brief Start indices, per thread. */
    74	      _DifferenceType* _M_starts;
    75	
    76	      /** @brief Storage in which to sort. */
    77	      _ValueType** _M_temporary;
    78	
    79	      /** @brief Samples. */
    80	      _ValueType* _M_samples;
    81	
    82	      /** @brief Offsets to add to the found positions. */
    83	      _DifferenceType* _M_offsets;
    84	
    85	      /** @brief Pieces of data to merge @c [thread][__sequence] */
    86	      std::vector<_Piece<_DifferenceType> >* _M_pieces;
    87	  };
    88	
    89	  /**
    90	   *  @brief Select _M_samples from a sequence.
    91	   *  @param __sd Pointer to algorithm data. _Result will be placed in
    92	   *  @c __sd->_M_samples.
    93	   *  @param __num_samples Number of _M_samples to select.
    94	   */
    95	  template<typename _RAIter, typename _DifferenceTp>
    96	    void
    97	    __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
    98				_DifferenceTp __num_samples)
    99	    {
   100	      typedef std::iterator_traits<_RAIter> _TraitsType;
   101	      typedef typename _TraitsType::value_type _ValueType;
   102	      typedef _DifferenceTp _DifferenceType;
   103	
   104	      _ThreadIndex __iam = omp_get_thread_num();
   105	
   106	      _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
   107	
   108	      __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], 
   109			      __num_samples + 1, __es);
   110	
   111	      for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
   112		::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
   113		    _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
   114					       + __es[__i + 1]]);
   115	
   116	      delete[] __es;
   117	    }
   118	
   119	  /** @brief Split consistently. */
   120	  template<bool __exact, typename _RAIter,
   121		   typename _Compare, typename _SortingPlacesIterator>
   122	    struct _SplitConsistently
   123	    { };
   124	
   125	  /** @brief Split by exact splitting. */
   126	  template<typename _RAIter, typename _Compare,
   127		   typename _SortingPlacesIterator>
   128	    struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
   129	    {
   130	      void
   131	      operator()(const _ThreadIndex __iam,
   132			 _PMWMSSortingData<_RAIter>* __sd,
   133			 _Compare& __comp,
   134			 const typename
   135			 std::iterator_traits<_RAIter>::difference_type
   136			 __num_samples) const
   137	      {
   138	#       pragma omp barrier
   139	
   140		std::vector<std::pair<_SortingPlacesIterator,
   141		                      _SortingPlacesIterator> >
   142		  __seqs(__sd->_M_num_threads);
   143		for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
   144		  __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
   145					       __sd->_M_temporary[__s]
   146					       + (__sd->_M_starts[__s + 1]
   147						  - __sd->_M_starts[__s]));
   148	
   149		std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
   150	
   151		// if not last thread
   152		if (__iam < __sd->_M_num_threads - 1)
   153		  multiseq_partition(__seqs.begin(), __seqs.end(),
   154				     __sd->_M_starts[__iam + 1], __offsets.begin(),
   155				     __comp);
   156	
   157		for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
   158		  {
   159		    // for each sequence
   160		    if (__iam < (__sd->_M_num_threads - 1))
   161		      __sd->_M_pieces[__iam][__seq]._M_end
   162			= __offsets[__seq] - __seqs[__seq].first;
   163		    else
   164		      // very end of this sequence
   165		      __sd->_M_pieces[__iam][__seq]._M_end =
   166			__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
   167		  }
   168	
   169	#       pragma omp barrier
   170	
   171		for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
   172		  {
   173		    // For each sequence.
   174		    if (__iam > 0)
   175		      __sd->_M_pieces[__iam][__seq]._M_begin =
   176			__sd->_M_pieces[__iam - 1][__seq]._M_end;
   177		    else
   178		      // Absolute beginning.
   179		      __sd->_M_pieces[__iam][__seq]._M_begin = 0;
   180		  }
   181	      }
   182	  };
   183	
   184	  /** @brief Split by sampling. */ 
   185	  template<typename _RAIter, typename _Compare,
   186		   typename _SortingPlacesIterator>
   187	    struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
   188	    {
   189	      void
   190	      operator()(const _ThreadIndex __iam,
   191			 _PMWMSSortingData<_RAIter>* __sd,
   192			 _Compare& __comp,
   193			 const typename
   194			 std::iterator_traits<_RAIter>::difference_type
   195			 __num_samples) const
   196	      {
   197		typedef std::iterator_traits<_RAIter> _TraitsType;
   198		typedef typename _TraitsType::value_type _ValueType;
   199		typedef typename _TraitsType::difference_type _DifferenceType;
   200	
   201		__determine_samples(__sd, __num_samples);
   202	
   203	#       pragma omp barrier
   204	
   205	#       pragma omp single
   206		__gnu_sequential::sort(__sd->_M_samples,
   207				       __sd->_M_samples
   208				       + (__num_samples * __sd->_M_num_threads),
   209				       __comp);
   210	
   211	#       pragma omp barrier
   212	
   213		for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
   214		  {
   215		    // For each sequence.
   216		    if (__num_samples * __iam > 0)
   217		      __sd->_M_pieces[__iam][__s]._M_begin =
   218	                std::lower_bound(__sd->_M_temporary[__s],
   219					 __sd->_M_temporary[__s]
   220					 + (__sd->_M_starts[__s + 1]
   221					    - __sd->_M_starts[__s]),
   222					 __sd->_M_samples[__num_samples * __iam],
   223					 __comp)
   224	                - __sd->_M_temporary[__s];
   225		    else
   226		      // Absolute beginning.
   227		      __sd->_M_pieces[__iam][__s]._M_begin = 0;
   228	
   229		    if ((__num_samples * (__iam + 1)) <
   230			(__num_samples * __sd->_M_num_threads))
   231		      __sd->_M_pieces[__iam][__s]._M_end =
   232	                std::lower_bound(__sd->_M_temporary[__s],
   233					 __sd->_M_temporary[__s]
   234					 + (__sd->_M_starts[__s + 1]
   235					    - __sd->_M_starts[__s]),
   236					 __sd->_M_samples[__num_samples * (__iam + 1)],
   237					 __comp)
   238	                - __sd->_M_temporary[__s];
   239		    else
   240		      // Absolute end.
   241		      __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
   242							    - __sd->_M_starts[__s]);
   243		  }
   244	      }
   245	  };
   246	  
   247	  template<bool __stable, typename _RAIter, typename _Compare>
   248	    struct __possibly_stable_sort
   249	    { };
   250	
   251	  template<typename _RAIter, typename _Compare>
   252	    struct __possibly_stable_sort<true, _RAIter, _Compare>
   253	    {
   254	      void operator()(const _RAIter& __begin,
   255			      const _RAIter& __end, _Compare& __comp) const
   256	      { __gnu_sequential::stable_sort(__begin, __end, __comp); }
   257	    };
   258	
   259	  template<typename _RAIter, typename _Compare>
   260	    struct __possibly_stable_sort<false, _RAIter, _Compare>
   261	    {
   262	      void operator()(const _RAIter __begin,
   263			      const _RAIter __end, _Compare& __comp) const
   264	      { __gnu_sequential::sort(__begin, __end, __comp); }
   265	    };
   266	
   267	  template<bool __stable, typename _Seq_RAIter,
   268		   typename _RAIter, typename _Compare,
   269		   typename _DiffType>
   270	    struct __possibly_stable_multiway_merge
   271	    { };
   272	
   273	  template<typename _Seq_RAIter, typename _RAIter,
   274		   typename _Compare, typename _DiffType>
   275	    struct __possibly_stable_multiway_merge<true, _Seq_RAIter,
   276						    _RAIter, _Compare, _DiffType>
   277	    {
   278	      void operator()(const _Seq_RAIter& __seqs_begin,
   279			      const _Seq_RAIter& __seqs_end,
   280			      const _RAIter& __target,
   281			      _Compare& __comp,
   282			      _DiffType __length_am) const
   283	      { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
   284				      __length_am, __comp, sequential_tag()); }
   285	    };
   286	
   287	  template<typename _Seq_RAIter, typename _RAIter,
   288		   typename _Compare, typename _DiffType>
   289	    struct __possibly_stable_multiway_merge<false, _Seq_RAIter,
   290						    _RAIter, _Compare, _DiffType>
   291	    {
   292	      void operator()(const _Seq_RAIter& __seqs_begin,
   293	                      const _Seq_RAIter& __seqs_end,
   294	                      const _RAIter& __target,
   295	                      _Compare& __comp,
   296	                      _DiffType __length_am) const
   297	      { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
   298			       __comp, sequential_tag()); }
   299	    };
   300	
   301	  /** @brief PMWMS code executed by each thread.
   302	   *  @param __sd Pointer to algorithm data.
   303	   *  @param __comp Comparator.
   304	   */
   305	  template<bool __stable, bool __exact, typename _RAIter,
   306		   typename _Compare>
   307	    void
   308	    parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
   309				  _Compare& __comp)
   310	    {
   311	      typedef std::iterator_traits<_RAIter> _TraitsType;
   312	      typedef typename _TraitsType::value_type _ValueType;
   313	      typedef typename _TraitsType::difference_type _DifferenceType;
   314	
   315	      _ThreadIndex __iam = omp_get_thread_num();
   316	
   317	      // Length of this thread's chunk, before merging.
   318	      _DifferenceType __length_local =
   319		__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
   320	
   321	      // Sort in temporary storage, leave space for sentinel.
   322	
   323	      typedef _ValueType* _SortingPlacesIterator;
   324	
   325	      __sd->_M_temporary[__iam] =
   326	        static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
   327							* (__length_local + 1)));
   328	
   329	      // Copy there.
   330	      std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
   331				      __sd->_M_source + __sd->_M_starts[__iam]
   332				      + __length_local,
   333				      __sd->_M_temporary[__iam]);
   334	
   335	      __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
   336	        (__sd->_M_temporary[__iam],
   337		 __sd->_M_temporary[__iam] + __length_local,
   338	         __comp);
   339	
   340	      // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
   341	      // __sd->_M_temporary[__iam] + __length_local.
   342	
   343	      // No barrier here: Synchronization is done by the splitting routine.
   344	
   345	      _DifferenceType __num_samples =
   346	        _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
   347	      _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
   348	        (__iam, __sd, __comp, __num_samples);
   349	
   350	      // Offset from __target __begin, __length after merging.
   351	      _DifferenceType __offset = 0, __length_am = 0;
   352	      for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
   353		{
   354		  __length_am += (__sd->_M_pieces[__iam][__s]._M_end
   355				  - __sd->_M_pieces[__iam][__s]._M_begin);
   356		  __offset += __sd->_M_pieces[__iam][__s]._M_begin;
   357		}
   358	
   359	      typedef std::vector<
   360	        std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
   361	        _SeqVector;
   362	      _SeqVector __seqs(__sd->_M_num_threads);
   363	
   364	      for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
   365		{
   366		  __seqs[__s] =
   367		    std::make_pair(__sd->_M_temporary[__s]
   368				   + __sd->_M_pieces[__iam][__s]._M_begin,
   369				   __sd->_M_temporary[__s]
   370				   + __sd->_M_pieces[__iam][__s]._M_end);
   371		}
   372	
   373	      __possibly_stable_multiway_merge<
   374	        __stable, typename _SeqVector::iterator,
   375		_RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
   376					     __sd->_M_source + __offset, __comp,
   377					     __length_am);
   378	
   379	#     pragma omp barrier
   380	
   381	      for (_DifferenceType __i = 0; __i < __length_local; ++__i)
   382		__sd->_M_temporary[__iam][__i].~_ValueType();
   383	      ::operator delete(__sd->_M_temporary[__iam]);
   384	    }
   385	
   386	  /** @brief PMWMS main call.
   387	   *  @param __begin Begin iterator of sequence.
   388	   *  @param __end End iterator of sequence.
   389	   *  @param __comp Comparator.
   390	   *  @param __num_threads Number of threads to use.
   391	   */
   392	  template<bool __stable, bool __exact, typename _RAIter,
   393	           typename _Compare>
   394	    void
   395	    parallel_sort_mwms(_RAIter __begin, _RAIter __end,
   396			       _Compare __comp,
   397			       _ThreadIndex __num_threads)
   398	    {
   399	      _GLIBCXX_CALL(__end - __begin)
   400	
   401	      typedef std::iterator_traits<_RAIter> _TraitsType;
   402	      typedef typename _TraitsType::value_type _ValueType;
   403	      typedef typename _TraitsType::difference_type _DifferenceType;
   404	
   405	      _DifferenceType __n = __end - __begin;
   406	
   407	      if (__n <= 1)
   408		return;
   409	
   410	      // at least one element per thread
   411	      if (__num_threads > __n)
   412		__num_threads = static_cast<_ThreadIndex>(__n);
   413	
   414	      // shared variables
   415	      _PMWMSSortingData<_RAIter> __sd;
   416	      _DifferenceType* __starts;
   417	      _DifferenceType __size;
   418	
   419	#     pragma omp parallel num_threads(__num_threads)
   420	      {
   421	        __num_threads = omp_get_num_threads(); //no more threads than requested
   422	
   423	#       pragma omp single
   424		{
   425		  __sd._M_num_threads = __num_threads;
   426		  __sd._M_source = __begin;
   427		  
   428		  __sd._M_temporary = new _ValueType*[__num_threads];
   429	
   430		  if (!__exact)
   431		    {
   432		      __size =
   433			(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
   434			* __num_threads;
   435		      __sd._M_samples = static_cast<_ValueType*>
   436			(::operator new(__size * sizeof(_ValueType)));
   437		    }
   438		  else
   439		    __sd._M_samples = 0;
   440	
   441		  __sd._M_offsets = new _DifferenceType[__num_threads - 1];
   442		  __sd._M_pieces
   443		    = new std::vector<_Piece<_DifferenceType> >[__num_threads];
   444		  for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
   445		    __sd._M_pieces[__s].resize(__num_threads);
   446		  __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
   447	
   448		  _DifferenceType __chunk_length = __n / __num_threads;
   449		  _DifferenceType __split = __n % __num_threads;
   450		  _DifferenceType __pos = 0;
   451		  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
   452		    {
   453		      __starts[__i] = __pos;
   454		      __pos += ((__i < __split)
   455				? (__chunk_length + 1) : __chunk_length);
   456		    }
   457		  __starts[__num_threads] = __pos;
   458		} //single
   459	
   460	        // Now sort in parallel.
   461	        parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
   462	      } //parallel
   463	
   464	      delete[] __starts;
   465	      delete[] __sd._M_temporary;
   466	
   467	      if (!__exact)
   468		{
   469		  for (_DifferenceType __i = 0; __i < __size; ++__i)
   470		    __sd._M_samples[__i].~_ValueType();
   471		  ::operator delete(__sd._M_samples);
   472		}
   473	
   474	      delete[] __sd._M_offsets;
   475	      delete[] __sd._M_pieces;
   476	    }
   477	
   478	} //namespace __gnu_parallel
   479	
   480	#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */